% vim: spell spelllang=en \documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer} \usetheme{Warsaw} \usepackage[utf8]{inputenc} \usepackage[english]{babel} \usepackage[T1]{fontenc} \usepackage{amsmath} \usepackage{amsfonts} \usepackage{amssymb} \usepackage{booktabs} \usepackage{makecell} \usepackage{ifthen} \usepackage{colortbl} \usepackage{tabularx} \usepackage{pifont} \usepackage{texlib/my_listings} \usepackage{texlib/specific} \usepackage{texlib/common} \usepackage{texlib/todo} \usepackage{inconsolata} \lstset{basicstyle=\footnotesize\ttfamily} \renewcommand\theadalign{c} \renewcommand\theadfont{\scriptsize\bfseries} \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{} \newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}} \newcommand{\slidecountline}{ \ifthenelse{\theframenumber = 0} {} {\insertframenumber/\inserttotalframenumber}} \newcommand{\sectionline}{ \ifthenelse{\thesection = 0} {} {\Roman{section}~-- \insertsection}} \newcommand{\cmark}{\color{OliveGreen}\ding{52}} \newcommand{\xmark}{\color{BrickRed}\ding{56}} \AtBeginSection{ \begin{frame} \vfill \centering \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title} \usebeamerfont{title}\insertsectionhead\par% \end{beamercolorbox} \vfill \end{frame} } \lstdefinelanguage{gdb}{ morekeywords={gdb}, sensitive=false, } \newcolumntype{b}{X} \newcolumntype{s}{>{\hsize=.43\hsize}X} \newcommand{\lstinl} {\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \title[\sectionline] {Growing the DWARF tougher:\\ synthesis, validation and compilation} \author[\slidecountline]{\textbf{Théophile Bastian} \\ \vspace{0.5em} {{\footnotesize{}Based on work done with}\\ \textbf{Francesco Zappa Nardelli}, \textbf{Stephen Kell}, \textbf{Simon Ser}}} \date{} %\subject{} %\logo{} \institute{ENS Paris, INRIA} \begin{document} \begin{frame} \addtocounter{framenumber}{-1} \titlepage{} \vspace{-2em} \begin{center} \begin{align*} \text{Slides: } &\text{\url{https://tobast.fr/files/sweden18.pdf}} \\ \end{align*} \end{center} \end{frame} \begin{frame}{~} \addtocounter{framenumber}{-1} \tableofcontents[hideallsubsections] \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{DWARF and stack unwinding data} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Introduction} \begin{frame}[fragile]{We often use stack unwinding!} \begin{columns}[c] \begin{column}{0.70\textwidth} \begin{lstlisting}[language=gdb, numbers=none, escapechar=|] Program received signal SIGSEGV. 0x54625 in fct_b at segfault.c:5 5 printf("%l\n", *b); |\pause| (gdb) backtrace #0 0x54625 in fct_b at segfault.c:5 #1 0x54663 in fct_a at segfault.c:10 #2 0x54674 in main at segfault.c:14 |\pause| (gdb) frame 1 #1 0x54663 in fct_a at segfault.c:10 10 fct_b((int*) a); |\pause| (gdb) print a $1 = 84 \end{lstlisting} \vspace{-1em} \pause{} \begin{center} \textbf{\Large How does it work?!} \end{center} \end{column} \begin{column}{0.35\textwidth} \pause{} \includegraphics[width=0.95\linewidth]{img/call_stack} \end{column} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Stack frames and unwinding} \begin{frame}{Call stack and registers} \begin{columns}[c] \begin{column}{0.55\textwidth} \begin{center} \large\bf How do we get the grandparent RA\@? \medskip Isn't it as trivial as \texttt{pop()}? \vspace{2em} \only<2>{We only have \reg{rsp} and \reg{rip}.} \end{center} \end{column} \begin{column}{0.45\textwidth} \includegraphics[width=0.95\linewidth]{img/call_stack} \end{column} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{DWARF tables} \newcolumntype{a}{>{\columncolor{RedOrange}}l} \begin{frame}{DWARF unwinding data} \vspace{2em} \tt \footnotesize \begin{tabular}{ >{\columncolor{YellowGreen}}l >{\columncolor{Thistle}}l l l l l l l >{\columncolor{Apricot}}l} ~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\ 0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\ 0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\ 0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\ 0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\ 0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ \rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ \end{tabular} \pause{} \vspace{-3cm} \hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo} \hspace{-1cm} \end{frame} \begin{frame}[t, fragile]{The real DWARF} \begin{lstlisting}[numbers=none, language=] 00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37 DW_CFA_advance_loc: 2 to 0000000000084952 DW_CFA_def_cfa_offset: 16 DW_CFA_offset: r15 (r15) at cfa-16 DW_CFA_advance_loc: 2 to 0000000000084954 DW_CFA_def_cfa_offset: 24 DW_CFA_offset: r14 (r14) at cfa-24 DW_CFA_advance_loc: 2 to 0000000000084956 DW_CFA_def_cfa_offset: 32 DW_CFA_offset: r13 (r13) at cfa-32 DW_CFA_advance_loc: 2 to 0000000000084958 DW_CFA_def_cfa_offset: 40 DW_CFA_offset: r12 (r12) at cfa-40 DW_CFA_advance_loc: 1 to 0000000000084959 [...] \end{lstlisting} \begin{itemize} \item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand by a \alert{Turing-complete bytecode}!} \end{itemize} \pause{} \vspace{-6.5cm} \begin{center} \bf \fontsize{8cm}{1cm} \colorbox{white}{\alert{Complex}} \\ \colorbox{white}{\alert{\& slow!}} \end{center} \end{frame} \begin{frame}{Why does slow matter?} \begin{itemize} \item{} After all, we're talking about \alert{debugging procedures} ran by a \alert{human being} (slower than the machine). \ldots{}or are we? \end{itemize} \pause{} \begin{center} \textbf{\Large{}No!} \end{center} \begin{itemize} \pause{}\item{} Pretty much any \alert{program analysis tool} \pause{}\item{} \alert{Profiling} with polling profilers \pause{}\item{} \alert{Exception handling} in C++ \end{itemize} \vspace{2em} \begin{center} \textbf{\Large{}Debug data is not only for debugging} \end{center} \end{frame} \begin{frame}{Difficult to generate} Major concern with DWARF\@: it is \alert{difficult to generate} (correctly). \begin{itemize} \item{} \alert{Hard to generate}: each compiler pass must keep it in sync \item{} Most of it is \alert{seldom used} (\eg{} unwinding data of dusty code), and thus \alert{seldom tested} \end{itemize} \vspace{1em} Yields to \begin{itemize} \item{} unreliable DWARF\@: can cause headaches when debugging \item{} or not generated at all (eg. OCaml until recently) \todo{Check this} \end{itemize} \vspace{1em} \begin{center} \Large\bf $\leadsto$ Complex, buggy, untested \end{center} \end{frame} \newcommand{\LinusMailOne}{ ``Sorry, but last time was too f\dots painful. The whole (and only) point of unwinders is to make debugging easy when a bug occurs. But \alert{the dwarf unwinder had bugs} itself, or \alert{our dwarf information had bugs}, and in either case it actually turned several trivial bugs into a \alert{total undebuggable hell}.'' } \newcommand{\LinusMailTwo}{ ``If you can \alert{mathematically prove that the unwinder is correct} — even in the presence of bogus and actively incorrect unwinding information — and never ever follows a bad pointer, \alert{I’ll reconsider}.'' } \newcommand{\LinusSource}{ \hfill ---~Linus Torvalds, Kernel mailing list, 2012 } \begin{frame}{A debugging hell: Linux kernel} \LinusMailOne{} \vspace{1em} \LinusSource{} \pause{}\vspace{1em} \begin{center} \Large\bf \alert{This is where we still are!} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Unwinding data validation} \begin{frame}{Main idea} \begin{itemize} \item If we follow \alert{one path of execution}, we can track the return address location \item If we read unwinding data \alert{at runtime}, we can \alert{check the RA consistency} at each step \end{itemize} \end{frame} \newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\} \newcommand{\blknote}[1] {\begin{block}{} \centering\large #1 \end{block}} \newcommand{\blklnote}[1] {\begin{block}{} \large #1 \end{block}} \newcommand{\tblhl}{\rowcolor{Tan}} \begin{frame}{Example} \newcommand{\firsttblrows}{ \tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}} \rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8} \rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8} \rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8} \rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8} \tblrowval{push}{\%r12}{rsp+32}{c-8} \tblrowval{push}{\%rbp}{rsp+40}{c-8} \tblrowval{push}{\%rbx}{rsp+48}{c-8} \tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8} \rowonly<9>{\tblhl{}} \tblrowval{cmp}{\$0x1,\%edi}{rsp+160}{c-8}% } \only<-8>{ \begin{table} \ttfamily\large \begin{tabularx}{0.9\linewidth}{ l b >{\columncolor{SkyBlue}}s >{\columncolor{SkyBlue}}s } \firsttblrows{}% \tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8} \tblrowval{pop}{\%rbx}{rsp+56}{c-8} \tblrowval{pop}{\%rbp}{rsp+48}{c-8} \end{tabularx} \end{table} \blknote{ \centering \begin{overlayarea}{0.9\textwidth}{4.8ex} \only<3>{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)} \only<4>{\texttt{push} decreases \reg{rsp} by 8: % \alert{ra = *(\reg{rsp} + 8)}} \only<5>{and again: % \alert{ra = *(\reg{rsp} + 16)}} \only<6>{This \texttt{mov} leaves \reg{rsp} untouched: % \alert{ra = *(\reg{rsp} + 16)}} \only<7>{The unwinding table can actually be seen as\\ an \alert{abstract interpretation} of the code\ldots} \only<8>{\ldots and thus, for a given run, be \alert{re-computed for verification}} \end{overlayarea} } } \only<9->{ \begin{table} \ttfamily\large \begin{tabularx}{0.9\linewidth}{ l b >{\columncolor{SkyBlue}}s >{\columncolor{SkyBlue}}s } \firsttblrows{}% \end{tabularx} \end{table} \vspace{-0.8em} \only<9>{\blklnote{If, within an execution, \begin{itemize} \item ra = \texttt{*(0xFFFF1098)} \item \reg{rsp} = \texttt{0xFFFF1000} \end{itemize} We can \alert{evaluate both expressions} and \alert{compare} }} } \end{frame} \begin{frame}{Dynamic validation} \textbf{Abstract state} \begin{itemize} \item \alert{Stack} of actual \alert{addresses} where \alert{return addresses} are stored \end{itemize} \vspace{2em}\pause{} \textbf{Abstract instruction semantics} \begin{itemize} \item[\alert{\texttt{call}}] push \alert{\reg{rsp}} on the stack \item[\alert{\texttt{ret}}] pop from the stack \end{itemize} \vspace{2em}\pause{} \textbf{Validation of each instruction} \begin{itemize} \item Evaluate the return address provided by DWARF \item Compare it with the value at the top of the stack \end{itemize} \end{frame} \begin{frame}{In practice: \texttt{eh\_frame\_check}} Strategy implemented and working: \alert{\texttt{eh\_frame\_check}} \begin{itemize} \item \texttt{gdb} allows for Python instrumentation \vspace{1em}\pause{} \item Parse ELF and DWARF data (\texttt{pyelftools}) \item Run the binary inside gdb \item Pause at each (assembly) step \item Jointly evaluate DWARF data and the abstract stack \item Report upon error \end{itemize} \vspace{1em} Works, but… \alert{Python is slow}! A few thousand of ASM instructions/second (good enough) \end{frame} \begin{frame}{A real bug!} \begin{columns}[c] \begin{column}{0.65\textwidth} \lstinputlisting[language=C]{src/llvm_bug.c} \end{column} \begin{column}{0.35\textwidth} \textbf{\texttt{CSmith}\\+ \texttt{Creduce}\\+ \texttt{eh\_frame\_check}} \vspace{2em}$\leadsto$ \alert{\bf LLVM (3.8) bug!} \end{column} \end{columns} \end{frame} \renewcommand{\tblrowval}[5]{#1 & #2 & #3 & #4 & #5 \\} \begin{frame}{A real bug!} \begin{columns}[c] \column{0.7\textwidth} \begin{align*} \onslide<2->{\textbf{Abstract state} \qquad & \left[\texttt{0xFFFF1000}\right]} \\ \onslide<3->{\reg{rsp} \qquad & % ~\,\texttt{% \only<3-4>{0xFFFF1000}% \only<5-8>{0xFFFF0FF8}% \only<9->{0xFFFF1000}% } } \end{align*} \column{0.3\textwidth} {\vspace{-4mm}\bf \fontsize{2cm}{5.5cm}\selectfont % \only<4>{\cmark}% \only<6>{\cmark}% \only<8>{\cmark}% \only<10->{\xmark}% } \end{columns} \vspace{1em} \begin{table} \ttfamily\large \begin{tabularx}{0.95\linewidth}{ l l b >{\columncolor{SkyBlue}}s >{\columncolor{SkyBlue}}s } \tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{}{\textbf{CFA}}{\textbf{ra}} %\rowonly<3>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8} \rowonly<2-4>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8} \rowonly<5-6>{\tblhl{}} \tblrowval{}{}{}{rsp+16}{c-8} \tblrowval{}{[\ldots]}{}{}{} \tblrowval{}{}{}{}{} \rowonly<7-8>{\tblhl{}} \tblrowval{40061d}{pop}{\%rbx}{rsp+16}{c-8} \rowonly<9->{\tblhl{}} \tblrowval{40061e}{retq}{}{rsp+16}{c-8} \end{tabularx} \end{table} \begin{center} \bf\Large % \onslide<11>{$\leadsto$ LLVM bug \#13161} \end{center} \end{frame} \begin{frame}{What for, in the end?} \begin{itemize}[<+->] \item We can \alert{find bugs} in compilers \item We can \alert{validate DWARF tables}! \item \ldots{}well, only along \alert{one execution path}\ldots \item but mostly we are close to a working \alert{algorithm} to \alert{synthesize unwinding data from assembly}! \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Unwinding data synthesis from assembly} \begin{frame}{What have we got so far?} We now want to \alert{synthesize unwinding data}. \pause{}That means \alert{forgetting the blue part of the previous schemes}. \begin{itemize}[<+->] \item Upon entering a function, we know (ABI) \[ \cfa = \reg{rsp} - 8 \qquad \ra = \cfa + 8 \] \item For each instruction, we know \alert{how it changes \cfa}. \item We assume \alert{\ra{} constant wrt. \cfa}. \begin{itemize} \item[$\leadsto$] only \cfa{} tracking matters (for unwinding) \end{itemize} \item We had a working strategy for a \alert{linear execution} \item We still have to handle \begin{itemize} \item \alert{\cfa{} expression} \item \alert{control flow graph} \end{itemize} \end{itemize} \end{frame} \begin{frame}{\cfa{} expression} Two possibilities: \begin{itemize} \item Either \reg{rbp} is used as base pointer \pause{} \item Or we must track \cfa{} wrt. \reg{rsp} \begin{itemize} \item And update it after each instruction if needed \end{itemize} \end{itemize} \end{frame} \begin{frame}{Control flow graph} \begin{columns}[c] \column{0.4\textwidth} \lstinputlisting[language=C]{src/cfg/cfg.c} \pause{} \column{0.30\textwidth} \begin{figure} \centering \includegraphics[width=\textwidth]{src/cfg/cfg.png} \end{figure} \end{columns} \pause{} \begin{itemize} \item \alert{Upon split} (eg. \texttt{X})\alert{:} nothing special, propagate end state of X to children nodes A and B \item \alert{Upon join} (eg.\texttt{while\_end})\alert{:} check consistency of both input states \begin{itemize} \item If tricky, \texttt{gcc} will have used \reg{rbp}, even with \texttt{-fomit-frame-pointer}. \end{itemize} \end{itemize} \end{frame} \renewcommand{\tblrowval}[4]{#1 & #2 & #3 & #4 \\} \begin{frame}{Trust the compiler to avoid tricky unwinding} \begin{columns}[c] \column{0.55\textwidth} \lstinputlisting[language=C,firstline=3,lastline=7] {src/use_rbp/use_rbp.c} \pause{} \begin{itemize} \item At each loop cycle, \alert{\texttt{y} is larger} and \alert{allocated on the stack} \item Thus, \reg{rsp} is \alert{farther from \cfa} at each cycle: no constant rule $\cfa = \reg{rsp} + k$. \item A complex DWARF expression is possible, but \alert{the compiler won't}. \end{itemize} \pause{} \column{0.45\textwidth} \lstbash{\$ gcc -O0 -g -c src.c -fomit-frame-pointer} \vspace{1em} \begin{center} \tt \begin{tabular}{l l l l} \tblrowval{LOC} {CFA} {rbp} {ra} \tblrowval{000} {rsp+8} {u} {c-8} \tblrowval{001} {rsp+16} {c-16} {c-8} \tblrowval{004} {\alert{rbp}+16} {c-16} {c-8} \tblrowval{010} {\alert{rbp}+16} {c-16} {c-8} \tblrowval{0ce} {rsp+8} {c-16} {c-8} \end{tabular} \end{center} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Unwinding data compilation} \subsection{Compilation ahead-of-time} \begin{frame}{Compilation overview} \begin{itemize} \item Compiled to \alert{C code} \item C code then \alert{compiled to native binary} (gcc) \begin{itemize} \item[$\leadsto$] gcc optimisations for free \end{itemize} \item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{} \bigskip{} \item Morally a \alert{monolithic switch} on IPs \item Each case contains assembly that computes a \alert{row of the table} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Compilation example: original C, DWARF} \lstinputlisting[language=C]{src/fib7/fib7.cfde} \end{frame} \begin{frame}[shrink]{Compilation example: generated C} \lstinputlisting[language=C]{src/fib7/fib7.eh_elf_basic.c} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Compilation choices} \textbf{In order to keep the compiler \alert{simple} and \alert{easily testable}, the whole DWARF5 instruction set is not supported.} \begin{itemize} \item Focus on \alert{x86\_64} \item Focus on unwinding return address \\ \vspace{0.3ex} $\leadsto$ \textit{Allows building a backtrace} \begin{itemize} \item \alert{suitable for perf, not for gdb} \item Only supports \alert{unwinding registers}: \reg{rip}, \reg{rsp}, \reg{rbp}, \reg{rbx} \item Supports the \alert{wide majority} ($> 99.9\%$) of instructions used \item Among \alert{4000} randomly sampled filed, only \alert{24} containing unsupported instructions \end{itemize} \end{itemize} \end{frame} \begin{frame}{Interface: libunwind} \begin{itemize} \item \alert{libunwind}: \textit{de facto} standard library for unwinding \item Relies on DWARF \bigskip{} \item \texttt{libunwind-eh\_elf}: alternative implementation using \ehelfs{} \item[$\leadsto$] \alert{alternative implementation} of libunwind, almost plug-and-play for existing projects! \begin{itemize} \item[$\leadsto$] It is \alert{easy} to use \ehelfs{}: just link against the right library! \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Size optimisation: outlining} \begin{itemize} \item This \alert{works}, but \alert{takes space}: about \alert{7 times larger in size} than regular DWARF\@. \item DWARF optimisation strategy: \alert{alter previous row}. \\ Causes slowness: we cannot do that. \item Remark: a lot of lines appear often. \begin{itemize} \item[$\leadsto$] \textbf{\emph{outline} them!} \end{itemize} \pause{} \item On libc, $20\,827$ rows $\rightarrow$ $302$ outlined ($1.5\,\%$) \item Turn the big switch into a binary search \alert{if/else tree} \end{itemize} \pause{} \bigskip{} \begin{center} $\leadsto$ only \textbf{2.5 times bigger than DWARF} \end{center} \end{frame} \begin{frame}{Example with outlining} \lstinputlisting[language=C]{src/fib7/fib7.eh_elf_outline.c} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Benchmarking} \begin{frame}{Benchmarking requirements} \begin{enumerate} \item Thousands of samples (single unwind: $10\,\mu{}s$) \item Interesting enough program to unwind: nested functions, complex FDEs \item Mitigate caching: don't always unwind from the \emph{same} point \item Yet be fair: don't always unwind from totally different places \item Distribute evenly: if possible, also from within libraries \end{enumerate} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{perf instrumentation} \textbf{\alert{perf} is a state-of-the-art polling profiler for Linux.} \begin{itemize} \item{} used to get readings of the time spent in each function \item{} works by regularly stopping the program, unwinding its stack, then aggregating the gathered data \end{itemize} \pause{}\bigskip{} \textbf{Instrumenting perf matches all the requirements!} \begin{itemize} \item{} \alert{Plug \ehelfs{} into perf}: use \ehelfs{} instead of DWARF to unwind the stack \item{} Implement \alert{unwinding performance counters} inside perf \bigskip{} \item{} Use perf on \alert{hackbench}, a kernel stress-test program \begin{itemize} \item Small program \item Lots of calls \item Relies on libc, libpthread \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Results} \begin{frame}{Time performance} \small \centering \begin{tabular}{l r r r r r} \toprule \thead{Unwinding method} & \thead{Frames \\ unwound} & \thead{Tot.\ time \\ ($\mu s$)} & \thead{Avg. \\ time / frame \\ ($ns$)} & \thead{Time \\ ratio} \\ \midrule \alert{\ehelfs{}} & 23506 % Frames unwound & 14837 % Total time & 631 % Avg time & 1 \\ \prog{libunwind}, \alert{cached} & 27058 % Frames unwound & 441601 % Total time & 16320 % Avg time & \alert{25.9} \\ \prog{libunwind}, \alert{uncached} & 27058 % Frames unwound & 671292 % Total time & 24809 % Avg time & \alert{39.3} \\ \bottomrule \end{tabular} \end{frame} \begin{frame}{Space performance} \begin{center} \begin{tabular}{r r r r r r} \toprule \thead{Object} & \thead{\% of binary size} & \thead{Growth factor} \\ \midrule libc & 21.88 & 2.41 \\ libpthread & 43.71 & 2.19 \\ ld & 22.09 & 2.97 \\ hackbench & 93.87 & 4.99 \\ \midrule Total & 22.81 & \alert{2.44} \\ \bottomrule \end{tabular} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Conclusion} \setcounter{section}{0} \begin{frame}{} \LinusMailOne{} \only<1>{\LinusSource{}} \only<2->\vspace{1em} \pause{} \LinusMailTwo{} \LinusSource{} \pause{} \vspace{1em} \begin{center} \bf Give us a few months to make Linus reconsider \texttt{;)} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \vspace{5mm} \includegraphics[width=\linewidth]{img/keep_breathing} \vspace{-1cm} \begin{center} \large \begin{align*} \textbf{Slides: } &\text{\url{https://tobast.fr/files/sweden18.pdf}} \\ \end{align*} \end{center} \end{frame} \end{document}