% vim: spell spelllang=en \documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer} \usetheme{Warsaw} \usepackage[utf8]{inputenc} \usepackage[english]{babel} \usepackage[T1]{fontenc} \usepackage{amsmath} \usepackage{amsfonts} \usepackage{amssymb} \usepackage{booktabs} \usepackage{makecell} \usepackage{ifthen} \usepackage{colortbl} \usepackage{tabularx} \usepackage{pifont} \usepackage{multirow} \usepackage{texlib/my_listings} \usepackage{texlib/specific} \usepackage{texlib/common} \usepackage{texlib/todo} \usepackage{inconsolata} \lstset{basicstyle=\footnotesize\ttfamily} \renewcommand\theadalign{c} \renewcommand\theadfont{\scriptsize\bfseries} \setbeamertemplate{navigation symbols}{} \setbeamertemplate{headline}{} \newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}} \newcommand{\slidecountline}{ \ifthenelse{\theframenumber = 0} {} {\insertframenumber/\inserttotalframenumber}} \newcommand{\sectionline}{ \ifthenelse{\thesection = 0} {} {\Roman{section}~-- \insertsection}} \newcommand{\cmark}{\color{OliveGreen}\ding{52}} \newcommand{\xmark}{\color{BrickRed}\ding{56}} \newcommand{\sectiontitleframe}{ \begin{frame} \vfill \centering \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title} \usebeamerfont{title}\insertsectionhead\par% \end{beamercolorbox} \vfill \end{frame}} \lstdefinelanguage{gdb}{ morekeywords={gdb}, sensitive=false, } \newcolumntype{b}{X} \newcolumntype{s}{>{\hsize=.43\hsize}X} \newcommand{\lstinl} {\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \title[\sectionline] {Reliable and Fast DWARF-based Stack Unwinding} \author[\slidecountline]{\textbf{Théophile Bastian}\\ \textbf{Stephen Kell} \\ \textbf{Francesco Zappa Nardelli}} \date{} %\subject{} %\logo{} \institute{ENS Paris, University of Kent, Inria} \begin{document} \begin{frame} \addtocounter{framenumber}{-1} \titlepage{} \vspace{-2em} \begin{center} {\large \url{https://huit.re/frdwarf}}\\ {\todo{font size} Slides, paper, code} \todo{FUNDING: ONR Vertica + Google Research Fellowship (logos)} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{DWARF and stack unwinding data} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Introduction} \begin{frame}[fragile]{} \begin{columns}[c] \begin{column}{0.70\textwidth} \begin{lstlisting}[language=gdb, numbers=none, escapechar=|] $ ./a.out Segmentation fault. |\pause|(gdb) backtrace #0 0x54625 in fct_b #1 0x54663 in fct_a #2 0x54674 in main \end{lstlisting} \pause{} \begin{center} \textbf{\Large How does it work?!} \end{center} \end{column} \begin{column}{0.35\textwidth} \pause{} \includegraphics[width=0.95\linewidth]{img/call_stack} \end{column} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Stack frames and unwinding} \begin{frame}{Call stack and registers} \begin{columns}[c] \begin{column}{0.55\textwidth} \begin{center} \large\bf How do we get the RA\@?\\Easy, \reg{rbp}! \vspace{2em} \onslide<2>{What if we only have \reg{rsp}?} \end{center} \end{column} \begin{column}{0.45\textwidth} \includegraphics[width=0.95\linewidth]{img/call_stack} \end{column} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{DWARF tables} \newcolumntype{a}{>{\columncolor{RedOrange}}l} \begin{frame}{DWARF unwinding data} \vspace{2em} \tt \footnotesize \begin{tabular}{ >{\columncolor{YellowGreen}}l >{\columncolor{Thistle}}l l l l l l l >{\columncolor{Apricot}}l} ~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\ 0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\ 0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\ 0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\ 0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\ 0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ \rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ 0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\ \end{tabular} \pause{} \vspace{-3cm} \hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo} \hspace{-1cm} \end{frame} \begin{frame}[t, fragile]{The real DWARF} \begin{lstlisting}[numbers=none, language=] 00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37 DW_CFA_advance_loc: 2 to 0000000000084952 DW_CFA_def_cfa_offset: 16 DW_CFA_offset: r15 (r15) at cfa-16 DW_CFA_advance_loc: 2 to 0000000000084954 DW_CFA_def_cfa_offset: 24 DW_CFA_offset: r14 (r14) at cfa-24 DW_CFA_advance_loc: 2 to 0000000000084956 DW_CFA_def_cfa_offset: 32 DW_CFA_offset: r13 (r13) at cfa-32 DW_CFA_advance_loc: 2 to 0000000000084958 DW_CFA_def_cfa_offset: 40 DW_CFA_offset: r12 (r12) at cfa-40 DW_CFA_advance_loc: 1 to 0000000000084959 [...] \end{lstlisting} \begin{itemize} \item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand by a \alert{Turing-complete bytecode}!} \end{itemize} \pause{} \vspace{-6.5cm} \begin{center} \bf \fontsize{8cm}{1cm} \colorbox{white}{\alert{Complex}} \\ \colorbox{white}{\alert{\& slow!}} \end{center} \end{frame} \begin{frame}{Why does slow matter?} \begin{itemize} \item{} After all, we're talking about \alert{debugging procedures} ran by a \alert{human being} (slower than the machine). \ldots{}or are we? \end{itemize} \pause{} \begin{center} \textbf{\Large{}No!} \end{center} \begin{itemize} \pause{}\item{} Pretty much any \alert{program analysis tool} \pause{}\item{} \alert{Profiling} with polling profilers \pause{}\item{} \alert{Exception handling} in C++ \end{itemize} \vspace{2em} \begin{center} \textbf{\Large{}Debug data is not only for debugging} \end{center} \vspace{1em} $\leadsto$ we might want \alert{an alternative time/space trade-off} \end{frame} \newcommand{\LinusMailOne}{ ``Sorry, but last time was too f\dots painful. The whole (and only) point of unwinders is to make debugging easy when a bug occurs. But \alert{the dwarf unwinder had bugs} itself, or \alert{our dwarf information had bugs}, and in either case it actually turned several trivial bugs into a \alert{total undebuggable hell}.'' } \newcommand{\LinusMailTwo}{ ``If you can \alert{mathematically prove that the unwinder is correct} — even in the presence of bogus and actively incorrect unwinding information — and never ever follows a bad pointer, \alert{I’ll reconsider}.'' } \newcommand{\LinusSource}{ \hfill ---~Linus Torvalds, 2012 } \begin{frame}{A debugging hell: Linux kernel} \LinusMailOne{} \only<1-2>{ \vspace{1em} \LinusSource{} } \vspace{1em} \only<2>{ \begin{center} \Large\bf \alert{This is where we still are!} \end{center} } \only<3>{ \LinusMailTwo{} \vspace{1em} \LinusSource{} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Unwinding data as an abstract execution of the assembly} \sectiontitleframe{} \newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\} \newcommand{\blknote}[1] {\begin{block}{} \centering\large #1 \end{block}} \newcommand{\blklnote}[1] {\begin{block}{} \large #1 \end{block}} \newcommand{\tblhl}{\rowcolor{Tan}} \begin{frame}{Working on an example} \newcommand{\firsttblrows}{ \tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}} \rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8} \rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8} \rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8} \rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8} \tblrowval{push}{\%r12}{rsp+32}{c-8} \tblrowval{push}{\%rbp}{rsp+40}{c-8} \tblrowval{push}{\%rbx}{rsp+48}{c-8} \tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8} } \only<-8>{ \begin{table} \ttfamily\large \begin{tabularx}{0.9\linewidth}{ l b >{\columncolor{SkyBlue}}s >{\columncolor{SkyBlue}}s } \firsttblrows{}% \tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8} \tblrowval{pop}{\%rbx}{rsp+56}{c-8} \tblrowval{pop}{\%rbp}{rsp+48}{c-8} \end{tabularx} \end{table} \blknote{ \centering \begin{overlayarea}{0.9\textwidth}{4.8ex} \only<3>{Upon function call, \alert{ra = *(\reg{rsp})}} \only<4>{\texttt{push} decreases \reg{rsp} by 8: % \alert{ra = *(\reg{rsp} + 8)}} \only<5>{and again: % \alert{ra = *(\reg{rsp} + 16)}} \only<6>{This \texttt{mov} leaves \reg{rsp} untouched: % \alert{ra = *(\reg{rsp} + 16)}} \only<7>{The unwinding table captures an \alert{abstract execution} of the code\ldots} \only<8>{\ldots and thus can be \alert{synthesized from the binary}.} \end{overlayarea} } } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Unwinding data synthesis from binaries} %\begin{frame}{Why would synthesis be useful?} % \begin{itemize} % \item As said earlier, \alert{DWARF is complex} % \item Some compilers \alert{do not generate it}: hard to \alert{debug} % \& \alert{profile}. % \item Think of \alert{JIT-compiled assembly} (eg. JVM) % \item \ldots{}or even \alert{hand-written inlined assembly}! % \begin{itemize} % \item Painful enough to write for not bothering with DWARF % \item May not even be known by the programmer, breaks gdb % \item May be wrong (remember Linus!) % \end{itemize} % \end{itemize} %\end{frame} \begin{frame}{How do we actually synthesize?} \begin{itemize} \item Upon entering a function, we know (ABI) \[ \cfa = \reg{rsp} - 8 \qquad \ra = \cfa + 8 \] \item The semantics of each instruction specifies \alert{how it changes \cfa}. \begin{itemize} \item Heuristic to decide whether we index with \reg{rbp} or \reg{rsp} \end{itemize} \item By performing a symbolic execution, we can \alert{synthesize the unwinding table} line by line. \item Control flow: forward data-flow analysis \item The fixpoints are immediate, cf article \end{itemize} \end{frame} \begin{frame}{} \vfill \centering \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title} \Large\bf Demo time! \end{beamercolorbox} \vfill \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Unwinding data compilation} \sectiontitleframe{} \subsection{Compilation ahead-of-time} \begin{frame}{Compilation overview} \begin{itemize} \item Compiled to \alert{C code} \item C code then \alert{compiled to native binary} (gcc) \begin{itemize} \item[$\leadsto$] gcc optimisations for free \end{itemize} \item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{} \bigskip{} \item Morally a \alert{monolithic switch} on IPs \item Each case contains assembly that computes a \alert{row of the table} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[shrink]{Compilation example: generated C} \lstinputlisting[language=C]{src/fib7/fib7.eh_elf_basic.c} \pause{} \vspace{1em} \begin{center} The real code is optimised, but boils down to this. \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Mostly plug-and-play: libunwind interface} \begin{itemize} \item \alert{libunwind}: \textit{de facto} standard library for unwinding \bigskip{} \item \texttt{libunwind-eh\_elf}: alternative implementation using \ehelfs{} \item[$\leadsto$] almost \alert{``relink-and-play''} for existing projects! \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Results} \begin{frame}{Performances} \begin{itemize} \item \alert{Speedup}: x15 (\prog{gzip}) to x25 (\prog{hackbench}) vs. libunwind \begin{itemize} \item libunwind: state of the art, aggressive caching. \end{itemize} \item \alert{Space overhead}: x2.6 to x3 vs. DWARF \vspace{2em} \item[$\leadsto$] Alternative time/space trade-off, favorable eg. for profiling. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Conclusion} \setcounter{section}{0} \begin{frame}{A fragment of our article} The original article \textbf{Reliable and Fast DWARF-based Stack Unwinding} contains \vspace{1em} \begin{itemize} \item{} DWARF unwinding tables validation; \item{} DWARF unwinding tables synthesis; \item{} DWARF-based unwinding speedup. \end{itemize} \vspace{1em} \begin{center} Come and chat if interested! \texttt{:)} \end{center} \end{frame} \end{document}