Slides: fix remarks after test presentation
This commit is contained in:
parent
9961ced06f
commit
a901b04298
7 changed files with 197 additions and 171 deletions
BIN
slides/img/dw_spec.png
Normal file
BIN
slides/img/dw_spec.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 602 KiB |
BIN
slides/img/stack/call_stack.png
Normal file
BIN
slides/img/stack/call_stack.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
BIN
slides/img/stack/call_stack.xcf
Normal file
BIN
slides/img/stack/call_stack.xcf
Normal file
Binary file not shown.
|
@ -11,6 +11,7 @@
|
|||
\usepackage{booktabs}
|
||||
\usepackage{makecell}
|
||||
\usepackage{ifthen}
|
||||
\usepackage{colortbl}
|
||||
|
||||
\usepackage{../shared/my_listings}
|
||||
%\usepackage{../shared/my_hyperref}
|
||||
|
@ -18,17 +19,35 @@
|
|||
\usepackage{../shared/common}
|
||||
\usepackage{../shared/todo}
|
||||
|
||||
\usepackage{inconsolata}
|
||||
\lstset{basicstyle=\footnotesize\ttfamily}
|
||||
|
||||
\renewcommand\theadalign{c}
|
||||
\renewcommand\theadfont{\scriptsize\bfseries}
|
||||
|
||||
\setbeamertemplate{navigation symbols}{}
|
||||
\setbeamertemplate{headline}{}
|
||||
|
||||
\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
|
||||
\newcommand{\slidecountline}{
|
||||
\ifthenelse{\theframenumber = 0}
|
||||
{}
|
||||
{\insertframenumber/\inserttotalframenumber}}
|
||||
\newcommand{\sectionline}{
|
||||
\ifthenelse{\thesection = 0}
|
||||
{}
|
||||
{\Roman{section}~-- \insertsection}}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\begin{frame}
|
||||
\vfill
|
||||
\centering
|
||||
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
|
||||
\usebeamerfont{title}\insertsectionhead\par%
|
||||
\end{beamercolorbox}
|
||||
\vfill
|
||||
\end{frame}
|
||||
}
|
||||
|
||||
\lstdefinelanguage{gdb}{
|
||||
morekeywords={gdb},
|
||||
|
@ -38,8 +57,8 @@
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\author[\slidecountline]{Théophile \textsc{Bastian} \\
|
||||
\small{Under supervision of Francesco Zappa Nardelli}}
|
||||
\title[DWARF unwinding data compilation]
|
||||
{Speeding up stack unwinding by compiling DWARF debugging data}
|
||||
\title[\sectionline]
|
||||
{Speeding up stack unwinding by compiling DWARF debug data}
|
||||
\date{March\ --\ August 2018}
|
||||
%\subject{}
|
||||
%\logo{}
|
||||
|
@ -51,16 +70,18 @@
|
|||
\addtocounter{framenumber}{-1}
|
||||
\titlepage{}
|
||||
|
||||
\vspace{-1em}
|
||||
\vspace{-2em}
|
||||
\begin{center}
|
||||
Slides: \url{https://tobast.fr/m2/slides.pdf} \\
|
||||
Report: \url{https://tobast.fr/m2/report.pdf}
|
||||
\begin{align*}
|
||||
\text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
|
||||
\text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
|
||||
\end{align*}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\begin{frame}{~}
|
||||
\addtocounter{framenumber}{-1}
|
||||
\tableofcontents
|
||||
\tableofcontents[hideallsubsections]
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -70,26 +91,36 @@
|
|||
\subsection{Introduction}
|
||||
|
||||
\begin{frame}[fragile]{We often use stack unwinding!}
|
||||
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
|
||||
Program received signal SIGSEGV, Segmentation fault.
|
||||
0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5
|
||||
5 printf("%l\n", *m);
|
||||
|\pause| (gdb) backtrace
|
||||
#0 0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5
|
||||
#1 0x0000555555554663 in fct_a (n=42) at segfault.c:10
|
||||
#2 0x0000555555554674 in main () at segfault.c:14
|
||||
|\pause| (gdb) frame 1
|
||||
#1 0x0000555555554663 in fct_a (n=42) at segfault.c:10
|
||||
10 fct_b((int*)(some_fct_a_var + 8));
|
||||
|\pause| (gdb) print some_fct_a_var
|
||||
$1 = 84
|
||||
\end{lstlisting}
|
||||
\begin{columns}[c]
|
||||
\begin{column}{0.70\textwidth}
|
||||
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
|
||||
Program received signal SIGSEGV.
|
||||
0x54625 in fct_b at segfault.c:5
|
||||
5 printf("%l\n", *b);
|
||||
|
||||
\pause{}
|
||||
\begin{center}
|
||||
\textbf{\Large How does it work?!}
|
||||
\end{center}
|
||||
\vspace{1em}
|
||||
|\pause| (gdb) backtrace
|
||||
#0 0x54625 in fct_b at segfault.c:5
|
||||
#1 0x54663 in fct_a at segfault.c:10
|
||||
#2 0x54674 in main at segfault.c:14
|
||||
|
||||
|\pause| (gdb) frame 1
|
||||
#1 0x54663 in fct_a at segfault.c:10
|
||||
10 fct_b((int*) a);
|
||||
|
||||
|\pause| (gdb) print a
|
||||
$1 = 84
|
||||
\end{lstlisting}
|
||||
\vspace{-1em}
|
||||
\pause{}
|
||||
\begin{center}
|
||||
\textbf{\Large How does it work?!}
|
||||
\end{center}
|
||||
\end{column}
|
||||
\begin{column}{0.35\textwidth}
|
||||
\pause{}
|
||||
\includegraphics[width=0.95\linewidth]{img/stack/call_stack}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -97,85 +128,64 @@ $1 = 84
|
|||
|
||||
\begin{frame}{Call stack and registers}
|
||||
\begin{columns}[c]
|
||||
\begin{column}{0.65\textwidth}
|
||||
\begin{itemize}
|
||||
\item Programs use a \alert{call stack}
|
||||
\item Organized in \alert{stack frames}
|
||||
\begin{itemize}
|
||||
\item Local variables
|
||||
\item Function parameters
|
||||
\item Keep track of nesting, registers and ``return
|
||||
point''
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\begin{column}{0.55\textwidth}
|
||||
\begin{center}
|
||||
\large\bf
|
||||
How do we get the grandparent RA\@?
|
||||
|
||||
Common registers:
|
||||
\begin{itemize}
|
||||
\item \reg{rip}: program counter (PC)
|
||||
\medskip
|
||||
|
||||
\item \reg{rsp}: stack pointer
|
||||
Isn't it as trivial as \texttt{pop()}?
|
||||
|
||||
\item \reg{rbp}: base pointer
|
||||
\begin{itemize}
|
||||
\item Saves \reg{rsp}
|
||||
\item Easy access
|
||||
\item Wastes a register
|
||||
\item Not often used (x86\_64)
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\vspace{2em}
|
||||
|
||||
\only<2>{We only have \reg{rsp} and \reg{rip}.}
|
||||
|
||||
\end{center}
|
||||
\end{column}
|
||||
\begin{column}{0.35\textwidth}
|
||||
\includegraphics[width=0.95\linewidth]{../shared/imgs/call_stack}
|
||||
\begin{column}{0.45\textwidth}
|
||||
\includegraphics[width=0.95\linewidth]{img/stack/call_stack}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Isn't it as trivial as \texttt{pop()}?}
|
||||
\begin{itemize}
|
||||
\item This is only a \alert{blob of binary data} without mandatory
|
||||
structure
|
||||
\item We ignore \alert{which registers were saved}
|
||||
\item We ignore \alert{whether \reg{rbp} was used}
|
||||
\item We ignore \alert{where the return address is stored}
|
||||
\item We ignore \alert{where the previous frame begins}
|
||||
\end{itemize}
|
||||
|
||||
\medskip
|
||||
|
||||
But\ldots{} if we know how to \alert{unwind one}, we can \alert{recurse}!
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{DWARF tables}
|
||||
|
||||
\begin{frame}[fragile, shrink]{DWARF unwinding data}
|
||||
\begin{lstlisting}[numbers=none, language=]
|
||||
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
|
||||
LOC CFA rbx rbp r12 r13 r14 r15 ra
|
||||
0084950 rsp+8 u u u u u u c-8
|
||||
0084952 rsp+16 u u u u u c-16 c-8
|
||||
0084954 rsp+24 u u u u c-24 c-16 c-8
|
||||
0084956 rsp+32 u u u c-32 c-24 c-16 c-8
|
||||
0084958 rsp+40 u u c-40 c-32 c-24 c-16 c-8
|
||||
0084959 rsp+48 u c-48 c-40 c-32 c-24 c-16 c-8
|
||||
008495a rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084962 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a19 rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a1d rsp+48 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a1e rsp+40 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a20 rsp+32 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a22 rsp+24 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a24 rsp+16 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a26 rsp+8 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
0084a30 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
||||
\end{lstlisting}
|
||||
\newcolumntype{a}{>{\columncolor{RedOrange}}l}
|
||||
|
||||
\begin{frame}{DWARF unwinding data}
|
||||
\vspace{2em}
|
||||
\tt \footnotesize
|
||||
\begin{tabular}{
|
||||
>{\columncolor{YellowGreen}}l
|
||||
>{\columncolor{Thistle}}l
|
||||
l l l l l l
|
||||
>{\columncolor{Apricot}}l}
|
||||
~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\
|
||||
0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\
|
||||
0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\
|
||||
0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\
|
||||
0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
|
||||
\end{tabular}
|
||||
|
||||
\pause{}
|
||||
|
||||
\vspace{-4cm}
|
||||
\vspace{-3cm}
|
||||
\hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo}
|
||||
\hspace{-1cm}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{The real DWARF}
|
||||
|
@ -194,22 +204,31 @@ $1 = 84
|
|||
DW_CFA_def_cfa_offset: 40
|
||||
DW_CFA_offset: r12 (r12) at cfa-40
|
||||
DW_CFA_advance_loc: 1 to 0000000000084959
|
||||
DW_CFA_def_cfa_offset: 48
|
||||
DW_CFA_offset: r6 (rbp) at cfa-48
|
||||
DW_CFA_advance_loc: 1 to 000000000008495a
|
||||
[...]
|
||||
\end{lstlisting}
|
||||
|
||||
\begin{itemize}
|
||||
\item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
|
||||
by a \alert{Turing-complete, slow bytecode}!}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Why does slow matter?}
|
||||
\textbf{Do we really care about speed for unwinding?}
|
||||
\begin{itemize}
|
||||
|
||||
\item{} After all, we're talking about \alert{debugging procedures} ran
|
||||
by a \alert{human being} (slower than the machine).
|
||||
|
||||
\ldots{}or are we?
|
||||
\end{itemize}
|
||||
|
||||
\pause{}
|
||||
\begin{center}
|
||||
\textbf{\Large{}No!}
|
||||
\end{center}
|
||||
|
||||
\begin{itemize}
|
||||
\pause{}\item{} Pretty much any \alert{program analysis tool}
|
||||
\pause{}\item{} \alert{Profiling} with polling profilers
|
||||
|
||||
\pause{}\item{} \alert{Exception handling} in C++
|
||||
|
@ -224,7 +243,25 @@ $1 = 84
|
|||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Compiling DWARF}
|
||||
\section{Compiling stack unwinding data ahead-of-time}
|
||||
|
||||
\subsection*{}
|
||||
|
||||
\begin{frame}{Compilation overview}
|
||||
\begin{itemize}
|
||||
\item Compiled to \alert{C code}
|
||||
\item C code then \alert{compiled to native binary} (gcc)
|
||||
\begin{itemize}
|
||||
\item[$\leadsto$] gcc optimisations for free
|
||||
\end{itemize}
|
||||
\item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
|
||||
\bigskip{}
|
||||
\item Morally a \alert{monolithic switch} on IPs
|
||||
\item Each case contains assembly that computes a \alert{row of the
|
||||
table}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Example}
|
||||
|
@ -241,40 +278,24 @@ $1 = 84
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Compilation Strategy}
|
||||
|
||||
\begin{frame}{Compilation overview}
|
||||
\begin{itemize}
|
||||
\item Compiled to \alert{C code}
|
||||
\item C code then \alert{compiled to native binary} (gcc)
|
||||
\begin{itemize}
|
||||
\item[$\leadsto$] gcc optimisations for free
|
||||
\end{itemize}
|
||||
\item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
|
||||
\bigskip{}
|
||||
\item Morally a \alert{monolithic switch} on IPs
|
||||
\item Each case fills the context structure
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Compilation choices}
|
||||
\textbf{In order to keep the compiler \alert{simple} and \alert{easily
|
||||
testable}, the whole DWARF5 instruction set is not supported.}
|
||||
|
||||
\begin{itemize}
|
||||
\item Tailored for \alert{x86\_64} (while DWARF is
|
||||
architecture-agnostic)
|
||||
\item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp},
|
||||
\reg{rbp}, \reg{rbx}
|
||||
\item Focus on \alert{x86\_64}
|
||||
\item Focus on unwinding return address \\
|
||||
\vspace{0.3ex}
|
||||
$\leadsto$ \textit{Allows building a backtrace}
|
||||
\begin{itemize}
|
||||
\item[$\leadsto$] suitable for perf, not for gdb
|
||||
\end{itemize}
|
||||
\item Supports the \alert{wide majority} ($> 99.9\%$) of instructions
|
||||
used (see later)
|
||||
\begin{itemize}
|
||||
\item Only supports few common expressions: already $~ 90\,\%$
|
||||
of expressions used
|
||||
\end{itemize}
|
||||
\item Among \alert{4000} randomly sampled filed, only \alert{24}
|
||||
containing unsupported instructions
|
||||
\item \alert{suitable for perf, not for gdb}
|
||||
\item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp},
|
||||
\reg{rbp}, \reg{rbx}
|
||||
\item Supports the \alert{wide majority} ($> 99.9\%$) of instructions
|
||||
used
|
||||
\item Among \alert{4000} randomly sampled filed, only \alert{24}
|
||||
containing unsupported instructions
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
@ -282,13 +303,15 @@ $1 = 84
|
|||
\begin{itemize}
|
||||
\item \alert{libunwind}: \textit{de facto} standard library for
|
||||
unwinding
|
||||
\item Uses DWARF in background
|
||||
\item Relies on DWARF
|
||||
|
||||
\bigskip{}
|
||||
|
||||
\item \texttt{libunwind-eh\_elf}: alternative implementation using
|
||||
\ehelfs{}
|
||||
|
||||
\item{} Result: \alert{alternative implementation} of libunwind, nearly
|
||||
plug-and-play for existing projects!
|
||||
\item[$\leadsto$] \alert{alternative implementation} of libunwind,
|
||||
almost plug-and-play for existing projects!
|
||||
\begin{itemize}
|
||||
\item[$\leadsto$] It is \alert{easy} to use \ehelfs{}: just
|
||||
link against the right library!
|
||||
|
@ -310,24 +333,20 @@ $1 = 84
|
|||
|
||||
\item Remark: a lot of lines appear often.
|
||||
\begin{itemize}
|
||||
\item[$\leadsto$] \emph{outline} them!
|
||||
\item[$\leadsto$] \textbf{\emph{outline} them!}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\pause{}
|
||||
|
||||
\textbf{Outlining:}
|
||||
\begin{itemize}
|
||||
\item On libc, $20\,827$ rows $\rightarrow$ $302$ outlined ($1.5\,\%$)
|
||||
\item Turn the big switch into a binary search \alert{if/else tree}
|
||||
\item \alert{Extract} the conditional bodies, put them afterwards
|
||||
\item Jump to them using a \alert{label/goto}
|
||||
\end{itemize}
|
||||
|
||||
\pause{}
|
||||
|
||||
\bigskip{}
|
||||
\begin{center}
|
||||
$\leadsto$ only \textbf{2.5 times heavier than DWARF}
|
||||
$\leadsto$ only \textbf{2.5 times bigger than DWARF}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
@ -337,7 +356,7 @@ $1 = 84
|
|||
|
||||
\subsection{A word on formalization}
|
||||
|
||||
\begin{frame}{A word on formalization}
|
||||
\begin{frame}[t]{A word on formalization}
|
||||
\begin{itemize}
|
||||
\item First task: \alert{writing semantics} for DWARF, written as
|
||||
mapping to C code.
|
||||
|
@ -346,6 +365,12 @@ $1 = 84
|
|||
\item What remains to prove is mostly \alert{simple or classic
|
||||
optimisations}
|
||||
\end{itemize}
|
||||
|
||||
\pause{}
|
||||
\vspace{-3cm}
|
||||
\begin{center}
|
||||
\includegraphics[width=0.8\linewidth, angle=10]{img/dw_spec.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -360,19 +385,11 @@ $1 = 84
|
|||
\item Yet be fair: don't always unwind from totally different places
|
||||
\item Distribute evenly: if possible, also from within libraries
|
||||
\end{enumerate}
|
||||
|
||||
\pause{}\vspace{1em}
|
||||
|
||||
\begin{itemize}
|
||||
\item 2 $\implies$ exit hand-crafted program. CSmith did not work
|
||||
either.
|
||||
\item 5 $\implies$ cannot call the unwinding procedure by hand
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{frame}{perf instrumentation}
|
||||
\textbf{\alert{perf} is a polling profiler.}
|
||||
\textbf{\alert{perf} is the state-of-the-art polling profiler for Linux.}
|
||||
\begin{itemize}
|
||||
\item{} used to get readings of the time spent in each function
|
||||
\item{} works by regularly stopping the program, unwinding its stack,
|
||||
|
@ -448,6 +465,7 @@ $1 = 84
|
|||
& 22.09 & 2.97 \\
|
||||
hackbench
|
||||
& 93.87 & 4.99 \\
|
||||
\midrule
|
||||
Total
|
||||
& 22.81 & \alert{2.44} \\
|
||||
\bottomrule
|
||||
|
@ -458,20 +476,25 @@ $1 = 84
|
|||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section*{}
|
||||
\setcounter{section}{0}
|
||||
|
||||
\begin{frame}{What next?}
|
||||
\begin{itemize}
|
||||
\item \alert{Outlining} was super efficient for
|
||||
compactness\ldots{} Worth trying on standard DWARF\@?
|
||||
|
||||
\item Implement a release-ready, packageable, easy to use version of
|
||||
perf with \ehelfs{} and submit it for inclusion
|
||||
|
||||
\item{} Measure \alert{C++ exceptions overhead} precisely in common
|
||||
software
|
||||
|
||||
\item{} Implement \alert{\ehelfs{}} support for \alert{C++ runtime}
|
||||
exception handling
|
||||
\item{} \ldots{}and many more possibilities to explore!
|
||||
exception handling, and other systems where unwinding is a
|
||||
performance bottleneck
|
||||
|
||||
\medskip
|
||||
|
||||
\item \alert{Outlining} was effective for
|
||||
compactness\ldots{} Try outlining DWARF bytecode\@?
|
||||
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
@ -493,11 +516,14 @@ $1 = 84
|
|||
\end{columns}
|
||||
|
||||
\vspace{1.5em}
|
||||
\begin{center}
|
||||
\Huge\bfseries
|
||||
Thank you!
|
||||
\end{center}
|
||||
|
||||
\begin{center}
|
||||
\large
|
||||
\begin{align*}
|
||||
\textbf{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
|
||||
\textbf{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
|
||||
\end{align*}
|
||||
\end{center}
|
||||
|
||||
\end{frame}
|
||||
|
||||
|
|
|
@ -1,13 +1,11 @@
|
|||
#include <stdio.h> DWARF
|
||||
DWARF
|
||||
CFA ra
|
||||
void fib7() { rsp+8 c-8
|
||||
int fibo[8]; rsp+48 c-8
|
||||
void fib7() { 0x615 rsp+8 c-8
|
||||
int fibo[8]; 0x620 rsp+48 c-8
|
||||
fibo[0] = 1;
|
||||
fibo[1] = 1;
|
||||
for(int pos = 2; pos < 8; ++pos)
|
||||
fibo[pos] =
|
||||
fibo[pos - 1]
|
||||
+ fibo[pos - 2];
|
||||
for(...)
|
||||
...
|
||||
printf("%d\n", fibo[7]);
|
||||
rsp+8 c-8
|
||||
0x659 rsp+8 c-8
|
||||
}
|
||||
|
|
|
@ -3,16 +3,13 @@ unwind_context_t _eh_elf(
|
|||
{
|
||||
unwind_context_t out_ctx;
|
||||
switch(pc) {
|
||||
// [...] Previous FDEs redacted
|
||||
...
|
||||
case 0x615 ... 0x618:
|
||||
out_ctx.rsp = ctx.rsp + (8);
|
||||
out_ctx.rsp = ctx.rsp + 8;
|
||||
out_ctx.rip =
|
||||
*((uintptr_t*)(out_ctx.rsp + (-8)));
|
||||
*((uintptr_t*)(out_ctx.rsp - 8));
|
||||
out_ctx.flags = 3u;
|
||||
return out_ctx;
|
||||
// [...] Further lines and FDEs redacted
|
||||
default:
|
||||
out_ctx.flags = 128u;
|
||||
return out_ctx;
|
||||
}
|
||||
...
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,15 +2,20 @@ unwind_context_t _eh_elf(
|
|||
unwind_context_t ctx, uintptr_t pc)
|
||||
{
|
||||
unwind_context_t out_ctx;
|
||||
if(pc < 0x619) { /* [...] */ } else {
|
||||
if(pc < 0x619) { ... }
|
||||
else {
|
||||
if(pc < 0x659) { // IP=0x619 ... 0x658
|
||||
goto _factor_4;
|
||||
} // [...]
|
||||
goto _factor_1;
|
||||
}
|
||||
...
|
||||
}
|
||||
|
||||
_factor_4:
|
||||
_factor_1:
|
||||
out_ctx.rsp = ctx.rsp + (48);
|
||||
out_ctx.rip = *((uintptr_t*)(out_ctx.rsp + (-8)));
|
||||
out_ctx.flags = 3u;
|
||||
|
||||
...
|
||||
|
||||
return out_ctx;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue