diff --git a/slides/img/dw_spec.png b/slides/img/dw_spec.png new file mode 100644 index 0000000..ad23073 Binary files /dev/null and b/slides/img/dw_spec.png differ diff --git a/slides/img/stack/call_stack.png b/slides/img/stack/call_stack.png new file mode 100644 index 0000000..b4e575e Binary files /dev/null and b/slides/img/stack/call_stack.png differ diff --git a/slides/img/stack/call_stack.xcf b/slides/img/stack/call_stack.xcf new file mode 100644 index 0000000..2bba27e Binary files /dev/null and b/slides/img/stack/call_stack.xcf differ diff --git a/slides/slides.tex b/slides/slides.tex index 03896df..9f1bbac 100644 --- a/slides/slides.tex +++ b/slides/slides.tex @@ -11,6 +11,7 @@ \usepackage{booktabs} \usepackage{makecell} \usepackage{ifthen} +\usepackage{colortbl} \usepackage{../shared/my_listings} %\usepackage{../shared/my_hyperref} @@ -18,17 +19,35 @@ \usepackage{../shared/common} \usepackage{../shared/todo} +\usepackage{inconsolata} +\lstset{basicstyle=\footnotesize\ttfamily} + \renewcommand\theadalign{c} \renewcommand\theadfont{\scriptsize\bfseries} \setbeamertemplate{navigation symbols}{} +\setbeamertemplate{headline}{} \newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}} \newcommand{\slidecountline}{ \ifthenelse{\theframenumber = 0} {} {\insertframenumber/\inserttotalframenumber}} +\newcommand{\sectionline}{ + \ifthenelse{\thesection = 0} + {} + {\Roman{section}~-- \insertsection}} +\AtBeginSection[]{ + \begin{frame} + \vfill + \centering + \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title} + \usebeamerfont{title}\insertsectionhead\par% + \end{beamercolorbox} + \vfill + \end{frame} +} \lstdefinelanguage{gdb}{ morekeywords={gdb}, @@ -38,8 +57,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \author[\slidecountline]{Théophile \textsc{Bastian} \\ \small{Under supervision of Francesco Zappa Nardelli}} -\title[DWARF unwinding data compilation] - {Speeding up stack unwinding by compiling DWARF debugging data} +\title[\sectionline] + {Speeding up stack unwinding by compiling DWARF debug data} \date{March\ --\ August 2018} %\subject{} %\logo{} @@ -51,16 +70,18 @@ \addtocounter{framenumber}{-1} \titlepage{} - \vspace{-1em} + \vspace{-2em} \begin{center} - Slides: \url{https://tobast.fr/m2/slides.pdf} \\ - Report: \url{https://tobast.fr/m2/report.pdf} + \begin{align*} + \text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\ + \text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}} + \end{align*} \end{center} \end{frame} -\begin{frame} +\begin{frame}{~} \addtocounter{framenumber}{-1} - \tableofcontents + \tableofcontents[hideallsubsections] \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -70,26 +91,36 @@ \subsection{Introduction} \begin{frame}[fragile]{We often use stack unwinding!} - \begin{lstlisting}[language=gdb, numbers=none, escapechar=|] -Program received signal SIGSEGV, Segmentation fault. -0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5 -5 printf("%l\n", *m); -|\pause| (gdb) backtrace -#0 0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5 -#1 0x0000555555554663 in fct_a (n=42) at segfault.c:10 -#2 0x0000555555554674 in main () at segfault.c:14 -|\pause| (gdb) frame 1 -#1 0x0000555555554663 in fct_a (n=42) at segfault.c:10 -10 fct_b((int*)(some_fct_a_var + 8)); -|\pause| (gdb) print some_fct_a_var -$1 = 84 - \end{lstlisting} + \begin{columns}[c] + \begin{column}{0.70\textwidth} + \begin{lstlisting}[language=gdb, numbers=none, escapechar=|] +Program received signal SIGSEGV. +0x54625 in fct_b at segfault.c:5 +5 printf("%l\n", *b); - \pause{} - \begin{center} - \textbf{\Large How does it work?!} - \end{center} - \vspace{1em} +|\pause| (gdb) backtrace +#0 0x54625 in fct_b at segfault.c:5 +#1 0x54663 in fct_a at segfault.c:10 +#2 0x54674 in main at segfault.c:14 + +|\pause| (gdb) frame 1 +#1 0x54663 in fct_a at segfault.c:10 +10 fct_b((int*) a); + +|\pause| (gdb) print a +$1 = 84 + \end{lstlisting} + \vspace{-1em} + \pause{} + \begin{center} + \textbf{\Large How does it work?!} + \end{center} + \end{column} + \begin{column}{0.35\textwidth} + \pause{} + \includegraphics[width=0.95\linewidth]{img/stack/call_stack} + \end{column} + \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -97,85 +128,64 @@ $1 = 84 \begin{frame}{Call stack and registers} \begin{columns}[c] - \begin{column}{0.65\textwidth} - \begin{itemize} - \item Programs use a \alert{call stack} - \item Organized in \alert{stack frames} - \begin{itemize} - \item Local variables - \item Function parameters - \item Keep track of nesting, registers and ``return - point'' - \end{itemize} - \end{itemize} + \begin{column}{0.55\textwidth} + \begin{center} + \large\bf + How do we get the grandparent RA\@? - Common registers: - \begin{itemize} - \item \reg{rip}: program counter (PC) + \medskip - \item \reg{rsp}: stack pointer + Isn't it as trivial as \texttt{pop()}? - \item \reg{rbp}: base pointer - \begin{itemize} - \item Saves \reg{rsp} - \item Easy access - \item Wastes a register - \item Not often used (x86\_64) - \end{itemize} - \end{itemize} + \vspace{2em} + + \only<2>{We only have \reg{rsp} and \reg{rip}.} + + \end{center} \end{column} - \begin{column}{0.35\textwidth} - \includegraphics[width=0.95\linewidth]{../shared/imgs/call_stack} + \begin{column}{0.45\textwidth} + \includegraphics[width=0.95\linewidth]{img/stack/call_stack} \end{column} \end{columns} \end{frame} -\begin{frame}{Isn't it as trivial as \texttt{pop()}?} - \begin{itemize} - \item This is only a \alert{blob of binary data} without mandatory - structure - \item We ignore \alert{which registers were saved} - \item We ignore \alert{whether \reg{rbp} was used} - \item We ignore \alert{where the return address is stored} - \item We ignore \alert{where the previous frame begins} - \end{itemize} - - \medskip - - But\ldots{} if we know how to \alert{unwind one}, we can \alert{recurse}! -\end{frame} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{DWARF tables} -\begin{frame}[fragile, shrink]{DWARF unwinding data} - \begin{lstlisting}[numbers=none, language=] -00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37 - LOC CFA rbx rbp r12 r13 r14 r15 ra -0084950 rsp+8 u u u u u u c-8 -0084952 rsp+16 u u u u u c-16 c-8 -0084954 rsp+24 u u u u c-24 c-16 c-8 -0084956 rsp+32 u u u c-32 c-24 c-16 c-8 -0084958 rsp+40 u u c-40 c-32 c-24 c-16 c-8 -0084959 rsp+48 u c-48 c-40 c-32 c-24 c-16 c-8 -008495a rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084962 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a19 rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a1d rsp+48 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a1e rsp+40 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a20 rsp+32 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a22 rsp+24 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a24 rsp+16 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a26 rsp+8 c-56 c-48 c-40 c-32 c-24 c-16 c-8 -0084a30 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8 - \end{lstlisting} +\newcolumntype{a}{>{\columncolor{RedOrange}}l} + +\begin{frame}{DWARF unwinding data} + \vspace{2em} + \tt \footnotesize + \begin{tabular}{ + >{\columncolor{YellowGreen}}l + >{\columncolor{Thistle}}l + l l l l l l + >{\columncolor{Apricot}}l} +~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\ +0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\ +0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\ +0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\ +0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\ +0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ +0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ + \end{tabular} \pause{} - \vspace{-4cm} + \vspace{-3cm} \hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo} \hspace{-1cm} - \end{frame} \begin{frame}[fragile]{The real DWARF} @@ -194,22 +204,31 @@ $1 = 84 DW_CFA_def_cfa_offset: 40 DW_CFA_offset: r12 (r12) at cfa-40 DW_CFA_advance_loc: 1 to 0000000000084959 - DW_CFA_def_cfa_offset: 48 - DW_CFA_offset: r6 (rbp) at cfa-48 - DW_CFA_advance_loc: 1 to 000000000008495a [...] \end{lstlisting} + + \begin{itemize} + \item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand + by a \alert{Turing-complete, slow bytecode}!} + \end{itemize} \end{frame} \begin{frame}{Why does slow matter?} - \textbf{Do we really care about speed for unwinding?} \begin{itemize} \item{} After all, we're talking about \alert{debugging procedures} ran by a \alert{human being} (slower than the machine). \ldots{}or are we? + \end{itemize} + \pause{} + \begin{center} + \textbf{\Large{}No!} + \end{center} + + \begin{itemize} + \pause{}\item{} Pretty much any \alert{program analysis tool} \pause{}\item{} \alert{Profiling} with polling profilers \pause{}\item{} \alert{Exception handling} in C++ @@ -224,7 +243,25 @@ $1 = 84 \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Compiling DWARF} +\section{Compiling stack unwinding data ahead-of-time} + +\subsection*{} + +\begin{frame}{Compilation overview} + \begin{itemize} + \item Compiled to \alert{C code} + \item C code then \alert{compiled to native binary} (gcc) + \begin{itemize} + \item[$\leadsto$] gcc optimisations for free + \end{itemize} + \item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{} + \bigskip{} + \item Morally a \alert{monolithic switch} on IPs + \item Each case contains assembly that computes a \alert{row of the + table} + \end{itemize} +\end{frame} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Example} @@ -241,40 +278,24 @@ $1 = 84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Compilation Strategy} -\begin{frame}{Compilation overview} - \begin{itemize} - \item Compiled to \alert{C code} - \item C code then \alert{compiled to native binary} (gcc) - \begin{itemize} - \item[$\leadsto$] gcc optimisations for free - \end{itemize} - \item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{} - \bigskip{} - \item Morally a \alert{monolithic switch} on IPs - \item Each case fills the context structure - \end{itemize} -\end{frame} - \begin{frame}{Compilation choices} \textbf{In order to keep the compiler \alert{simple} and \alert{easily testable}, the whole DWARF5 instruction set is not supported.} \begin{itemize} - \item Tailored for \alert{x86\_64} (while DWARF is - architecture-agnostic) - \item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp}, - \reg{rbp}, \reg{rbx} + \item Focus on \alert{x86\_64} + \item Focus on unwinding return address \\ + \vspace{0.3ex} + $\leadsto$ \textit{Allows building a backtrace} \begin{itemize} - \item[$\leadsto$] suitable for perf, not for gdb - \end{itemize} - \item Supports the \alert{wide majority} ($> 99.9\%$) of instructions - used (see later) - \begin{itemize} - \item Only supports few common expressions: already $~ 90\,\%$ - of expressions used - \end{itemize} - \item Among \alert{4000} randomly sampled filed, only \alert{24} - containing unsupported instructions + \item \alert{suitable for perf, not for gdb} + \item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp}, + \reg{rbp}, \reg{rbx} + \item Supports the \alert{wide majority} ($> 99.9\%$) of instructions + used + \item Among \alert{4000} randomly sampled filed, only \alert{24} + containing unsupported instructions + \end{itemize} \end{itemize} \end{frame} @@ -282,13 +303,15 @@ $1 = 84 \begin{itemize} \item \alert{libunwind}: \textit{de facto} standard library for unwinding - \item Uses DWARF in background + \item Relies on DWARF + + \bigskip{} \item \texttt{libunwind-eh\_elf}: alternative implementation using \ehelfs{} - \item{} Result: \alert{alternative implementation} of libunwind, nearly - plug-and-play for existing projects! + \item[$\leadsto$] \alert{alternative implementation} of libunwind, + almost plug-and-play for existing projects! \begin{itemize} \item[$\leadsto$] It is \alert{easy} to use \ehelfs{}: just link against the right library! @@ -310,24 +333,20 @@ $1 = 84 \item Remark: a lot of lines appear often. \begin{itemize} - \item[$\leadsto$] \emph{outline} them! + \item[$\leadsto$] \textbf{\emph{outline} them!} \end{itemize} - \end{itemize} \pause{} - \textbf{Outlining:} - \begin{itemize} + \item On libc, $20\,827$ rows $\rightarrow$ $302$ outlined ($1.5\,\%$) \item Turn the big switch into a binary search \alert{if/else tree} - \item \alert{Extract} the conditional bodies, put them afterwards - \item Jump to them using a \alert{label/goto} \end{itemize} \pause{} \bigskip{} \begin{center} - $\leadsto$ only \textbf{2.5 times heavier than DWARF} + $\leadsto$ only \textbf{2.5 times bigger than DWARF} \end{center} \end{frame} @@ -337,7 +356,7 @@ $1 = 84 \subsection{A word on formalization} -\begin{frame}{A word on formalization} +\begin{frame}[t]{A word on formalization} \begin{itemize} \item First task: \alert{writing semantics} for DWARF, written as mapping to C code. @@ -346,6 +365,12 @@ $1 = 84 \item What remains to prove is mostly \alert{simple or classic optimisations} \end{itemize} + + \pause{} + \vspace{-3cm} + \begin{center} + \includegraphics[width=0.8\linewidth, angle=10]{img/dw_spec.png} + \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -360,19 +385,11 @@ $1 = 84 \item Yet be fair: don't always unwind from totally different places \item Distribute evenly: if possible, also from within libraries \end{enumerate} - - \pause{}\vspace{1em} - - \begin{itemize} - \item 2 $\implies$ exit hand-crafted program. CSmith did not work - either. - \item 5 $\implies$ cannot call the unwinding procedure by hand - \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{perf instrumentation} - \textbf{\alert{perf} is a polling profiler.} + \textbf{\alert{perf} is the state-of-the-art polling profiler for Linux.} \begin{itemize} \item{} used to get readings of the time spent in each function \item{} works by regularly stopping the program, unwinding its stack, @@ -448,6 +465,7 @@ $1 = 84 & 22.09 & 2.97 \\ hackbench & 93.87 & 4.99 \\ + \midrule Total & 22.81 & \alert{2.44} \\ \bottomrule @@ -458,20 +476,25 @@ $1 = 84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{} +\setcounter{section}{0} \begin{frame}{What next?} \begin{itemize} - \item \alert{Outlining} was super efficient for - compactness\ldots{} Worth trying on standard DWARF\@? - \item Implement a release-ready, packageable, easy to use version of perf with \ehelfs{} and submit it for inclusion \item{} Measure \alert{C++ exceptions overhead} precisely in common software + \item{} Implement \alert{\ehelfs{}} support for \alert{C++ runtime} - exception handling - \item{} \ldots{}and many more possibilities to explore! + exception handling, and other systems where unwinding is a + performance bottleneck + + \medskip + + \item \alert{Outlining} was effective for + compactness\ldots{} Try outlining DWARF bytecode\@? + \end{itemize} \end{frame} @@ -493,11 +516,14 @@ $1 = 84 \end{columns} \vspace{1.5em} - \begin{center} - \Huge\bfseries - Thank you! - \end{center} + \begin{center} + \large + \begin{align*} + \textbf{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\ + \textbf{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}} + \end{align*} + \end{center} \end{frame} diff --git a/slides/src/fib7/fib7.cfde b/slides/src/fib7/fib7.cfde index e199a3f..b00df18 100644 --- a/slides/src/fib7/fib7.cfde +++ b/slides/src/fib7/fib7.cfde @@ -1,13 +1,11 @@ -#include DWARF + DWARF CFA ra -void fib7() { rsp+8 c-8 - int fibo[8]; rsp+48 c-8 +void fib7() { 0x615 rsp+8 c-8 + int fibo[8]; 0x620 rsp+48 c-8 fibo[0] = 1; fibo[1] = 1; - for(int pos = 2; pos < 8; ++pos) - fibo[pos] = - fibo[pos - 1] - + fibo[pos - 2]; + for(...) + ... printf("%d\n", fibo[7]); - rsp+8 c-8 + 0x659 rsp+8 c-8 } diff --git a/slides/src/fib7/fib7.eh_elf_basic.c b/slides/src/fib7/fib7.eh_elf_basic.c index 4c7dcc2..b8fa79b 100644 --- a/slides/src/fib7/fib7.eh_elf_basic.c +++ b/slides/src/fib7/fib7.eh_elf_basic.c @@ -3,16 +3,13 @@ unwind_context_t _eh_elf( { unwind_context_t out_ctx; switch(pc) { - // [...] Previous FDEs redacted + ... case 0x615 ... 0x618: - out_ctx.rsp = ctx.rsp + (8); + out_ctx.rsp = ctx.rsp + 8; out_ctx.rip = - *((uintptr_t*)(out_ctx.rsp + (-8))); + *((uintptr_t*)(out_ctx.rsp - 8)); out_ctx.flags = 3u; return out_ctx; - // [...] Further lines and FDEs redacted - default: - out_ctx.flags = 128u; - return out_ctx; - } + ... + } } diff --git a/slides/src/fib7/fib7.eh_elf_outline.c b/slides/src/fib7/fib7.eh_elf_outline.c index 9d24bc0..576f333 100644 --- a/slides/src/fib7/fib7.eh_elf_outline.c +++ b/slides/src/fib7/fib7.eh_elf_outline.c @@ -2,15 +2,20 @@ unwind_context_t _eh_elf( unwind_context_t ctx, uintptr_t pc) { unwind_context_t out_ctx; - if(pc < 0x619) { /* [...] */ } else { + if(pc < 0x619) { ... } + else { if(pc < 0x659) { // IP=0x619 ... 0x658 - goto _factor_4; - } // [...] + goto _factor_1; + } + ... } - _factor_4: + _factor_1: out_ctx.rsp = ctx.rsp + (48); out_ctx.rip = *((uintptr_t*)(out_ctx.rsp + (-8))); out_ctx.flags = 3u; + + ... + return out_ctx; }