talk-2019-10-OOPSLA19/slides.tex

824 lines
25 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

% vim: spell spelllang=en
\documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer}
\usetheme{Warsaw}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{makecell}
\usepackage{ifthen}
\usepackage{colortbl}
\usepackage{tabularx}
\usepackage{pifont}
\usepackage{texlib/my_listings}
\usepackage{texlib/specific}
\usepackage{texlib/common}
\usepackage{texlib/todo}
\usepackage{inconsolata}
\lstset{basicstyle=\footnotesize\ttfamily}
\renewcommand\theadalign{c}
\renewcommand\theadfont{\scriptsize\bfseries}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{headline}{}
\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
\newcommand{\slidecountline}{
\ifthenelse{\theframenumber = 0}
{}
{\insertframenumber/\inserttotalframenumber}}
\newcommand{\sectionline}{
\ifthenelse{\thesection = 0}
{}
{\Roman{section}~-- \insertsection}}
\newcommand{\cmark}{\color{OliveGreen}\ding{52}}
\newcommand{\xmark}{\color{BrickRed}\ding{56}}
\AtBeginSection[]{
\begin{frame}
\vfill
\centering
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
\usebeamerfont{title}\insertsectionhead\par%
\end{beamercolorbox}
\vfill
\end{frame}
}
\lstdefinelanguage{gdb}{
morekeywords={gdb},
sensitive=false,
}
\newcolumntype{b}{X}
\newcolumntype{s}{>{\hsize=.43\hsize}X}
\newcommand{\lstinl}
{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title[\sectionline] {Growing the DWARF tougher:\\
synthesis, validation and compilation}
\author[\slidecountline]{\textbf{Théophile Bastian} \\ \vspace{0.5em}
{{\footnotesize{}Based on work done with}\\
\textbf{Francesco Zappa Nardelli},
\textbf{Stephen Kell},
\textbf{Simon Ser}}}
\date{}
%\subject{}
%\logo{}
\institute{ENS Paris, INRIA}
\begin{document}
\begin{frame}
\addtocounter{framenumber}{-1}
\titlepage{}
\vspace{-2em}
\begin{center}
\todo{}
\begin{align*}
\text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
\text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
\end{align*}
\end{center}
\end{frame}
\begin{frame}{~}
\addtocounter{framenumber}{-1}
\tableofcontents[hideallsubsections]
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{DWARF and stack unwinding data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Introduction}
\begin{frame}[fragile]{We often use stack unwinding!}
\begin{columns}[c]
\begin{column}{0.70\textwidth}
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
Program received signal SIGSEGV.
0x54625 in fct_b at segfault.c:5
5 printf("%l\n", *b);
|\pause| (gdb) backtrace
#0 0x54625 in fct_b at segfault.c:5
#1 0x54663 in fct_a at segfault.c:10
#2 0x54674 in main at segfault.c:14
|\pause| (gdb) frame 1
#1 0x54663 in fct_a at segfault.c:10
10 fct_b((int*) a);
|\pause| (gdb) print a
$1 = 84
\end{lstlisting}
\vspace{-1em}
\pause{}
\begin{center}
\textbf{\Large How does it work?!}
\end{center}
\end{column}
\begin{column}{0.35\textwidth}
\pause{}
\includegraphics[width=0.95\linewidth]{img/call_stack}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Stack frames and unwinding}
\begin{frame}{Call stack and registers}
\begin{columns}[c]
\begin{column}{0.55\textwidth}
\begin{center}
\large\bf
How do we get the grandparent RA\@?
\medskip
Isn't it as trivial as \texttt{pop()}?
\vspace{2em}
\only<2>{We only have \reg{rsp} and \reg{rip}.}
\end{center}
\end{column}
\begin{column}{0.45\textwidth}
\includegraphics[width=0.95\linewidth]{img/call_stack}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{DWARF tables}
\newcolumntype{a}{>{\columncolor{RedOrange}}l}
\begin{frame}{DWARF unwinding data}
\vspace{2em}
\tt \footnotesize
\begin{tabular}{
>{\columncolor{YellowGreen}}l
>{\columncolor{Thistle}}l
l l l l l l
>{\columncolor{Apricot}}l}
~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\
0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\
0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\
0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\
0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\
0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
\end{tabular}
\pause{}
\vspace{-3cm}
\hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo}
\hspace{-1cm}
\end{frame}
\begin{frame}[t, fragile]{The real DWARF}
\begin{lstlisting}[numbers=none, language=]
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
DW_CFA_advance_loc: 2 to 0000000000084952
DW_CFA_def_cfa_offset: 16
DW_CFA_offset: r15 (r15) at cfa-16
DW_CFA_advance_loc: 2 to 0000000000084954
DW_CFA_def_cfa_offset: 24
DW_CFA_offset: r14 (r14) at cfa-24
DW_CFA_advance_loc: 2 to 0000000000084956
DW_CFA_def_cfa_offset: 32
DW_CFA_offset: r13 (r13) at cfa-32
DW_CFA_advance_loc: 2 to 0000000000084958
DW_CFA_def_cfa_offset: 40
DW_CFA_offset: r12 (r12) at cfa-40
DW_CFA_advance_loc: 1 to 0000000000084959
[...]
\end{lstlisting}
\begin{itemize}
\item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
by a \alert{Turing-complete bytecode}!}
\end{itemize}
\pause{}
\vspace{-6.5cm}
\begin{center}
\bf \fontsize{8cm}{1cm}
\colorbox{white}{\alert{Complex}} \\
\colorbox{white}{\alert{\& slow!}}
\end{center}
\end{frame}
\begin{frame}{Why does slow matter?}
\begin{itemize}
\item{} After all, we're talking about \alert{debugging procedures} ran
by a \alert{human being} (slower than the machine).
\ldots{}or are we?
\end{itemize}
\pause{}
\begin{center}
\textbf{\Large{}No!}
\end{center}
\begin{itemize}
\pause{}\item{} Pretty much any \alert{program analysis tool}
\pause{}\item{} \alert{Profiling} with polling profilers
\pause{}\item{} \alert{Exception handling} in C++
\end{itemize}
\vspace{2em}
\begin{center}
\textbf{\Large{}Debug data is not only for debugging}
\end{center}
\end{frame}
\begin{frame}{Difficult to generate}
Major concern with DWARF\@: it is \alert{difficult to generate}
(correctly).
\begin{itemize}
\item{} \alert{Hard to generate}: each compiler pass must keep it in
sync
\item{} Most of it is \alert{seldom used} (\eg{} unwinding data of
dusty code), and thus \alert{seldom tested}
\end{itemize}
\vspace{1em}
Yields to
\begin{itemize}
\item{} unreliable DWARF\@: can cause headaches when debugging
\item{} or not generated at all (eg. OCaml until recently) \todo{Check
this}
\end{itemize}
\vspace{1em}
\begin{center}
\Large\bf
$\leadsto$ Complex, buggy, untested
\end{center}
\end{frame}
\begin{frame}{A debugging hell: Linux kernel}
``Sorry, but last time was too f\dots painful. The whole (and
only) point of unwinders is to make debugging easy
when a bug occurs. But \alert{the dwarf unwinder had bugs}
itself, or \alert{our dwarf information had bugs}, and in either
case it actually turned several trivial bugs into a \alert{total
undebuggable hell}.''
\vspace{1em}
\only<1>{\hfill ---~Linus Torvalds, Kernel mailing list, 2012}
\pause{}
``If you can \alert{mathematically prove that the unwinder is
correct} — even in the presence of bogus and actively
incorrect unwinding information — and never ever
follows a bad pointer, \alert{Ill reconsider}.''
\vspace{1em}
\hfill ---~Linus Torvalds, Kernel mailing list, 2012
\pause{}\vspace{1em}
\begin{center}
\Large\bf
\alert{This is where we still are!}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data validation}
\begin{frame}{Main idea}
\begin{itemize}
\item If we follow \alert{one path of execution}, we can track the
return address location
\item If we read unwinding data \alert{at runtime}, we can \alert{check
the RA consistency} at each step
\end{itemize}
\end{frame}
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
\newcommand{\blknote}[1]
{\begin{block}{}
\centering\large
#1
\end{block}}
\newcommand{\blklnote}[1]
{\begin{block}{}
\large
#1
\end{block}}
\newcommand{\tblhl}{\rowcolor{Tan}}
\begin{frame}{Example}
\newcommand{\firsttblrows}{
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}}
\rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8}
\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8}
\rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8}
\rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8}
\tblrowval{push}{\%r12}{rsp+32}{c-8}
\tblrowval{push}{\%rbp}{rsp+40}{c-8}
\tblrowval{push}{\%rbx}{rsp+48}{c-8}
\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8}
\rowonly<9>{\tblhl{}} \tblrowval{cmp}{\$0x1,\%edi}{rsp+160}{c-8}%
}
\only<-8>{
\begin{table}
\ttfamily\large
\begin{tabularx}{0.9\linewidth}{
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\firsttblrows{}%
\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8}
\tblrowval{pop}{\%rbx}{rsp+56}{c-8}
\tblrowval{pop}{\%rbp}{rsp+48}{c-8}
\end{tabularx}
\end{table}
\blknote{
\centering
\begin{overlayarea}{0.9\textwidth}{4.8ex}
\only<3>{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}
\only<4>{\texttt{push} decreases \reg{rsp} by 8: %
\alert{ra = *(\reg{rsp} + 8)}}
\only<5>{and again: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<6>{This \texttt{mov} leaves \reg{rsp} untouched: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<7>{The unwinding table can actually be seen as\\
an \alert{abstract interpretation} of the code\ldots}
\only<8>{\ldots and thus, for a given run, be
\alert{re-computed for verification}}
\end{overlayarea}
}
}
\only<9->{
\begin{table}
\ttfamily\large
\begin{tabularx}{0.9\linewidth}{
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\firsttblrows{}%
\end{tabularx}
\end{table}
\vspace{-0.8em}
\only<9>{\blklnote{If, within an execution,
\begin{itemize}
\item ra = \texttt{*(0xFFFF1098)}
\item \reg{rsp} = \texttt{0xFFFF1000}
\end{itemize}
We can \alert{evaluate both expressions} and \alert{compare}
}}
}
\end{frame}
\begin{frame}{Dynamic validation}
\textbf{Abstract state}
\begin{itemize}
\item \alert{Stack} of actual \alert{addresses} where
\alert{return addresses} are stored
\end{itemize}
\vspace{2em}\pause{}
\textbf{Abstract instruction semantics}
\begin{itemize}
\item[\alert{\texttt{call}}] push \alert{\reg{rsp}} on the stack
\item[\alert{\texttt{ret}}] pop from the stack
\end{itemize}
\vspace{2em}\pause{}
\textbf{Validation of each instruction}
\begin{itemize}
\item Evaluate the return address provided by DWARF
\item Compare it with the value at the top of the stack
\end{itemize}
\end{frame}
\begin{frame}{In practice: \texttt{eh\_frame\_check}}
Strategy implemented and working: \alert{\texttt{eh\_frame\_check}}
\begin{itemize}
\item \texttt{gdb} allows for Python instrumentation
\vspace{1em}\pause{}
\item Parse ELF and DWARF data (\texttt{pyelftools})
\item Run the binary inside gdb
\item Pause at each (assembly) step
\item Jointly evaluate DWARF data and the abstract stack
\item Report upon error
\end{itemize}
\vspace{1em}
Works, but… \alert{Python is slow}!
A few thousand of ASM instructions/second (good enough)
\end{frame}
\begin{frame}{A real bug!}
\begin{columns}[c]
\begin{column}{0.65\textwidth}
\lstinputlisting[language=C]{src/llvm_bug.c}
\end{column} \begin{column}{0.35\textwidth}
\textbf{\texttt{CSmith}\\+ \texttt{Creduce}\\+
\texttt{eh\_frame\_check}}
\vspace{2em}$\leadsto$ \alert{\bf LLVM (3.8) bug!}
\end{column}
\end{columns}
\end{frame}
\renewcommand{\tblrowval}[5]{#1 & #2 & #3 & #4 & #5 \\}
\begin{frame}{A real bug!}
\begin{columns}[c]
\column{0.7\textwidth}
\begin{align*}
\onslide<2->{\textbf{Abstract state} \qquad &
\left[\texttt{0xFFFF1000}\right]} \\
\onslide<3->{\reg{rsp} \qquad & %
~\,\texttt{%
\only<3-4>{0xFFFF1000}%
\only<5-8>{0xFFFF0FF8}%
\only<9->{0xFFFF1000}%
}
}
\end{align*}
\column{0.3\textwidth}
{\vspace{-4mm}\bf \fontsize{2cm}{5.5cm}\selectfont %
\only<4>{\cmark}%
\only<6>{\cmark}%
\only<8>{\cmark}%
\only<10->{\xmark}%
}
\end{columns}
\vspace{1em}
\begin{table}
\ttfamily\large
\begin{tabularx}{0.95\linewidth}{
l
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{}{\textbf{CFA}}{\textbf{ra}}
%\rowonly<3>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8}
\rowonly<2-4>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8}
\rowonly<5-6>{\tblhl{}} \tblrowval{}{}{}{rsp+16}{c-8}
\tblrowval{}{[\ldots]}{}{}{}
\tblrowval{}{}{}{}{}
\rowonly<7-8>{\tblhl{}} \tblrowval{40061d}{pop}{\%rbx}{rsp+16}{c-8}
\rowonly<9->{\tblhl{}} \tblrowval{40061e}{retq}{}{rsp+16}{c-8}
\end{tabularx}
\end{table}
\begin{center}
\bf\Large %
\onslide<11>{$\leadsto$ LLVM bug \#13161}
\end{center}
\end{frame}
\begin{frame}{What for, in the end?}
\begin{itemize}[<+->]
\item We can \alert{find bugs} in compilers
\item We can \alert{validate DWARF tables}!
\item \ldots{}well, only along \alert{one execution path}\ldots
\item but mostly we are close to a working \alert{algorithm} to
\alert{synthesize unwinding data from assembly}!
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data synthesis from assembly}
\begin{frame}{What have we got so far?}
We now want to \alert{synthesize unwinding data}. \pause{}That means
\alert{forgetting the blue part of the previous schemes}.
\begin{itemize}[<+->]
\item Upon entering a function, we know (ABI)
\[ \cfa = \reg{rsp} - 8
\qquad \ra = \cfa + 8 \]
\item For each instruction, we know \alert{how it changes \cfa}.
\item We assume \alert{\ra{} constant wrt. \cfa}.
\begin{itemize}
\item[$\leadsto$] only \cfa{} tracking matters (for unwinding)
\end{itemize}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data compilation}
\subsection{Compilation ahead-of-time}
\begin{frame}{Compilation overview}
\begin{itemize}
\item Compiled to \alert{C code}
\item C code then \alert{compiled to native binary} (gcc)
\begin{itemize}
\item[$\leadsto$] gcc optimisations for free
\end{itemize}
\item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
\bigskip{}
\item Morally a \alert{monolithic switch} on IPs
\item Each case contains assembly that computes a \alert{row of the
table}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Compilation example: original C, DWARF}
\lstinputlisting[language=C]{src/fib7/fib7.cfde}
\end{frame}
\begin{frame}[shrink]{Compilation example: generated C}
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_basic.c}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Compilation choices}
\textbf{In order to keep the compiler \alert{simple} and \alert{easily
testable}, the whole DWARF5 instruction set is not supported.}
\begin{itemize}
\item Focus on \alert{x86\_64}
\item Focus on unwinding return address \\
\vspace{0.3ex}
$\leadsto$ \textit{Allows building a backtrace}
\begin{itemize}
\item \alert{suitable for perf, not for gdb}
\item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp},
\reg{rbp}, \reg{rbx}
\item Supports the \alert{wide majority} ($> 99.9\%$) of instructions
used
\item Among \alert{4000} randomly sampled filed, only \alert{24}
containing unsupported instructions
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Interface: libunwind}
\begin{itemize}
\item \alert{libunwind}: \textit{de facto} standard library for
unwinding
\item Relies on DWARF
\bigskip{}
\item \texttt{libunwind-eh\_elf}: alternative implementation using
\ehelfs{}
\item[$\leadsto$] \alert{alternative implementation} of libunwind,
almost plug-and-play for existing projects!
\begin{itemize}
\item[$\leadsto$] It is \alert{easy} to use \ehelfs{}: just
link against the right library!
\end{itemize}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Size optimisation: outlining}
\begin{itemize}
\item This \alert{works}, but \alert{takes space}: about \alert{7 times
larger in size} than regular DWARF\@.
\item DWARF optimisation strategy: \alert{alter previous row}. \\
Causes slowness: we cannot do that.
\item Remark: a lot of lines appear often.
\begin{itemize}
\item[$\leadsto$] \textbf{\emph{outline} them!}
\end{itemize}
\pause{}
\item On libc, $20\,827$ rows $\rightarrow$ $302$ outlined ($1.5\,\%$)
\item Turn the big switch into a binary search \alert{if/else tree}
\end{itemize}
\pause{}
\bigskip{}
\begin{center}
$\leadsto$ only \textbf{2.5 times bigger than DWARF}
\end{center}
\end{frame}
\begin{frame}{Example with outlining}
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_outline.c}
\end{frame}
\begin{frame}[t]{A word on formalization}
\begin{itemize}
\item First task: \alert{writing semantics} for DWARF, written as
mapping to C code.
\item DWARF5 specification: \alert{plain English}, no proper semantics
\item Compiled code is in substance equivalent to semantics
\item What remains to prove is mostly \alert{simple or classic
optimisations}
\end{itemize}
\pause{}
\vspace{-3cm}
\begin{center}
\includegraphics[width=0.8\linewidth, angle=10]{img/dw_spec.png}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Benchmarking}
\begin{frame}{Benchmarking requirements}
\begin{enumerate}
\item Thousands of samples (single unwind: $10\,\mu{}s$)
\item Interesting enough program to unwind: nested functions, complex
FDEs
\item Mitigate caching: don't always unwind from the \emph{same} point
\item Yet be fair: don't always unwind from totally different places
\item Distribute evenly: if possible, also from within libraries
\end{enumerate}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{perf instrumentation}
\textbf{\alert{perf} is the state-of-the-art polling profiler for Linux.}
\begin{itemize}
\item{} used to get readings of the time spent in each function
\item{} works by regularly stopping the program, unwinding its stack,
then aggregating the gathered data
\end{itemize}
\pause{}\bigskip{}
\textbf{Instrumenting perf matches all the requirements!}
\begin{itemize}
\item{} \alert{Plug \ehelfs{} into perf}: use \ehelfs{} instead of
DWARF to unwind the stack
\item{} Implement \alert{unwinding performance counters} inside perf
\bigskip{}
\item{} Use perf on \alert{hackbench}, a kernel stress-test program
\begin{itemize}
\item Small program
\item Lots of calls
\item Relies on libc, libpthread
\end{itemize}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Results}
\begin{frame}{Time performance}
\small
\centering
\begin{tabular}{l r r r r r}
\toprule
\thead{Unwinding method} & \thead{Frames \\ unwound}
& \thead{Tot.\ time \\ ($\mu s$)}
& \thead{Avg. \\ time / frame \\ ($ns$)}
& \thead{Time \\ ratio} \\
\midrule
\alert{\ehelfs{}}
& 23506 % Frames unwound
& 14837 % Total time
& 631 % Avg time
& 1
\\
\prog{libunwind}, \alert{cached}
& 27058 % Frames unwound
& 441601 % Total time
& 16320 % Avg time
& \alert{25.9}
\\
\prog{libunwind}, \alert{uncached}
& 27058 % Frames unwound
& 671292 % Total time
& 24809 % Avg time
& \alert{39.3}
\\
\bottomrule
\end{tabular}
\end{frame}
\begin{frame}{Space performance}
\begin{center}
\begin{tabular}{r r r r r r}
\toprule
\thead{Object}
& \thead{\% of binary size}
& \thead{Growth factor} \\
\midrule
libc
& 21.88 & 2.41 \\
libpthread
& 43.71 & 2.19 \\
ld
& 22.09 & 2.97 \\
hackbench
& 93.87 & 4.99 \\
\midrule
Total
& 22.81 & \alert{2.44} \\
\bottomrule
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{}
\setcounter{section}{0}
\begin{frame}{What next?}
\begin{itemize}
\item Implement a release-ready, packageable, easy to use version of
perf with \ehelfs{} and submit it for inclusion
\item{} Measure \alert{C++ exceptions overhead} precisely in common
software
\item{} Implement \alert{\ehelfs{}} support for \alert{C++ runtime}
exception handling, and other systems where unwinding is a
performance bottleneck
\medskip
\item \alert{Outlining} was effective for
compactness\ldots{} Try outlining DWARF bytecode\@?
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\vspace{5mm}
\includegraphics[width=\linewidth]{img/keep_breathing}
\vspace{-1cm}
\begin{center}
\large
\begin{align*}
\textbf{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
\textbf{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
\end{align*}
\end{center}
\end{frame}
\end{document}