% vim: spell spelllang=en

\documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer}
\usetheme{Warsaw}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{makecell}
\usepackage{ifthen}
\usepackage{colortbl}
\usepackage{tabularx}
\usepackage{pifont}
\usepackage{multirow}

\usepackage{texlib/my_listings}
\usepackage{texlib/specific}
\usepackage{texlib/common}
\usepackage{texlib/todo}

\usepackage{inconsolata}
\lstset{basicstyle=\footnotesize\ttfamily}

\renewcommand\theadalign{c}
\renewcommand\theadfont{\scriptsize\bfseries}

\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{headline}{}

\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
\newcommand{\slidecountline}{
    \ifthenelse{\theframenumber = 0}
        {}
        {\insertframenumber/\inserttotalframenumber}}
\newcommand{\sectionline}{
    \ifthenelse{\thesection = 0}
    {}
    {\Roman{section}~-- \insertsection}}

\newcommand{\cmark}{\color{OliveGreen}\ding{52}}
\newcommand{\xmark}{\color{BrickRed}\ding{56}}

\AtBeginSection{
    \begin{frame}
        \vfill
        \centering
        \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
            \usebeamerfont{title}\insertsectionhead\par%
        \end{beamercolorbox}
        \vfill
    \end{frame}
}

\lstdefinelanguage{gdb}{
    morekeywords={gdb},
    sensitive=false,
}


\newcolumntype{b}{X}
\newcolumntype{s}{>{\hsize=.43\hsize}X}

\newcommand{\lstinl}
{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title[\sectionline] {Reliable and Fast DWARF-based Stack Unwinding}
\author[\slidecountline]{\textbf{Théophile Bastian},\\
    \textbf{Stephen Kell}, \\
\textbf{Francesco Zappa Nardelli}}
\date{}
%\subject{}
%\logo{}
\institute{ENS Paris, University of Kent, Inria}

\begin{document}

\begin{frame}
    \addtocounter{framenumber}{-1}
	\titlepage{}

    \vspace{-2em}
    \begin{center}
        \begin{align*}
            \text{Slides: } &\text{\todo{add URL for this PDF}} \\
        \end{align*}
    \end{center}
\end{frame}

\begin{frame}{~}
    \addtocounter{framenumber}{-1}
    \tableofcontents[hideallsubsections]
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{DWARF and stack unwinding data}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Introduction}

\begin{frame}[fragile]{We often use stack unwinding!}
    \begin{columns}[c]
        \begin{column}{0.70\textwidth}
            \begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
Program received signal SIGSEGV.
0x54625 in fct_b at segfault.c:5
5           printf("%l\n", *b);

|\pause| (gdb) backtrace
#0   0x54625 in fct_b at segfault.c:5
#1   0x54663 in fct_a at segfault.c:10
#2   0x54674 in main at segfault.c:14

|\pause| (gdb) frame 1
#1   0x54663 in fct_a at segfault.c:10
10       fct_b((int*) a);

|\pause| (gdb) print a
$1 = 84
            \end{lstlisting}
            \vspace{-1em}
            \pause{}
            \begin{center}
                \textbf{\Large How does it work?!}
            \end{center}
        \end{column}
        \begin{column}{0.35\textwidth}
            \pause{}
            \includegraphics[width=0.95\linewidth]{img/call_stack}
        \end{column}
    \end{columns}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Stack frames and unwinding}

\begin{frame}{Call stack and registers}
    \begin{columns}[c]
        \begin{column}{0.55\textwidth}
            \begin{center}
                \large\bf
                How do we get the grandparent RA\@?

                \medskip

                Isn't it as trivial as \texttt{pop()}?

                \vspace{2em}

                \onslide<2>{We only have \reg{rsp} and \reg{rip}.}

            \end{center}
        \end{column}
        \begin{column}{0.45\textwidth}
            \includegraphics[width=0.95\linewidth]{img/call_stack}
        \end{column}
    \end{columns}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{DWARF tables}

\newcolumntype{a}{>{\columncolor{RedOrange}}l}

\begin{frame}{DWARF unwinding data}
    \vspace{2em}
    \tt \footnotesize
    \begin{tabular}{
            >{\columncolor{YellowGreen}}l
            >{\columncolor{Thistle}}l
            l l l l l l
            >{\columncolor{Apricot}}l}
~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\
0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\
0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\
0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\
0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\
0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
    \end{tabular}

    \pause{}

    \vspace{-3cm}
    \hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo}
    \hspace{-1cm}
\end{frame}

\begin{frame}[t, fragile]{The real DWARF}
    \begin{lstlisting}[numbers=none, language=]
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
  DW_CFA_advance_loc: 2 to 0000000000084952
  DW_CFA_def_cfa_offset: 16
  DW_CFA_offset: r15 (r15) at cfa-16
  DW_CFA_advance_loc: 2 to 0000000000084954
  DW_CFA_def_cfa_offset: 24
  DW_CFA_offset: r14 (r14) at cfa-24
  DW_CFA_advance_loc: 2 to 0000000000084956
  DW_CFA_def_cfa_offset: 32
  DW_CFA_offset: r13 (r13) at cfa-32
  DW_CFA_advance_loc: 2 to 0000000000084958
  DW_CFA_def_cfa_offset: 40
  DW_CFA_offset: r12 (r12) at cfa-40
  DW_CFA_advance_loc: 1 to 0000000000084959
  [...]
    \end{lstlisting}

    \begin{itemize}
        \item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
            by a \alert{Turing-complete bytecode}!}
    \end{itemize}

    \pause{}

    \vspace{-6.5cm}
    \begin{center}
    \bf \fontsize{8cm}{1cm}
        \colorbox{white}{\alert{Complex}} \\
        \colorbox{white}{\alert{\& slow!}}
    \end{center}
\end{frame}

\begin{frame}{Why does slow matter?}
    \begin{itemize}

        \item{} After all, we're talking about \alert{debugging procedures} ran
            by a \alert{human being} (slower than the machine).

            \ldots{}or are we?
    \end{itemize}

    \pause{}
    \begin{center}
        \textbf{\Large{}No!}
    \end{center}

    \begin{itemize}
        \pause{}\item{} Pretty much any \alert{program analysis tool}
        \pause{}\item{} \alert{Profiling} with polling profilers

        \pause{}\item{} \alert{Exception handling} in C++

    \end{itemize}

    \vspace{2em}

    \begin{center}
        \textbf{\Large{}Debug data is not only for debugging}
    \end{center}

    \vspace{1em}
    $\leadsto$ we might want \alert{an alternative time/space trade-off}
\end{frame}

\newcommand{\LinusMailOne}{
    ``Sorry, but last time was too f\dots painful. The whole (and
    only) point of unwinders is to make debugging easy
    when a bug occurs. But \alert{the dwarf unwinder had bugs}
    itself, or \alert{our dwarf information had bugs}, and in either
    case it actually turned several trivial bugs into a \alert{total
    undebuggable hell}.''
}
\newcommand{\LinusMailTwo}{
    ``If you can \alert{mathematically prove that the unwinder is
    correct} — even in the presence of bogus and actively
    incorrect unwinding information — and never ever
    follows a bad pointer, \alert{I’ll reconsider}.''
}
\newcommand{\LinusSource}{
    \hfill ---~Linus Torvalds, Kernel mailing list, 2012
}
\begin{frame}{A debugging hell: Linux kernel}
    \LinusMailOne{}

    \only<1-2>{
        \vspace{1em}
        \LinusSource{}
    }

    \vspace{1em}
    \only<2>{
        \begin{center}
            \Large\bf
            \alert{This is where we still are!}
        \end{center}
    }

    \only<3>{
        \LinusMailTwo{}

        \vspace{1em}
        \LinusSource{}
    }
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data as an abstract state}

\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
\newcommand{\blknote}[1]
    {\begin{block}{}
            \centering\large
            #1
    \end{block}}
\newcommand{\blklnote}[1]
    {\begin{block}{}
            \large
            #1
    \end{block}}
\newcommand{\tblhl}{\rowcolor{Tan}}

\begin{frame}{Working on an example}
    \newcommand{\firsttblrows}{
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}}
\rowonly<3>{\tblhl{}}  \tblrowval{push}{\%r15}{rsp+8}{c-8}
\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8}
\rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8}
\rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8}
\tblrowval{push}{\%r12}{rsp+32}{c-8}
\tblrowval{push}{\%rbp}{rsp+40}{c-8}
\tblrowval{push}{\%rbx}{rsp+48}{c-8}
\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8}
}

    \only<-8>{
        \begin{table}
            \ttfamily\large
            \begin{tabularx}{0.9\linewidth}{
                    l
                    b
                    >{\columncolor{SkyBlue}}s
                    >{\columncolor{SkyBlue}}s
                }
\firsttblrows{}%
\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8}
\tblrowval{pop}{\%rbx}{rsp+56}{c-8}
\tblrowval{pop}{\%rbp}{rsp+48}{c-8}
            \end{tabularx}
        \end{table}
        \blknote{
            \centering
        \begin{overlayarea}{0.9\textwidth}{4.8ex}
            \only<3>{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}
            \only<4>{\texttt{push} decreases \reg{rsp} by 8: %
                \alert{ra = *(\reg{rsp} + 8)}}
            \only<5>{and again: %
                \alert{ra = *(\reg{rsp} + 16)}}
            \only<6>{This \texttt{mov} leaves \reg{rsp} untouched: %
                \alert{ra = *(\reg{rsp} + 16)}}
            \only<7>{The unwinding table can actually be seen as\\
                    an \alert{abstract interpretation} of the code\ldots}
            \only<8>{\ldots and thus, for a given run, be
                \alert{re-computed from scratch}}
        \end{overlayarea}
    }
    }
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data synthesis from binaries}

\begin{frame}{Why would synthesis be useful?}
    \begin{itemize}
        \item As said earlier, \alert{DWARF is complex}
        \item Some compilers \alert{do not generate it}: hard to \alert{debug}
            \& \alert{profile}.
        \item Think of \alert{JIT-compiled assembly} (eg. JVM)
        \item \ldots{}or even \alert{hand-written inlined assembly}!
            \begin{itemize}
                \item Painful enough to write for not bothering with DWARF
                \item May not even be known by the programmer, breaks gdb
                \item May be wrong (remember Linus!)
            \end{itemize}
    \end{itemize}
\end{frame}

\begin{frame}{What have we got so far?}
    We now want to \alert{synthesize unwinding data}. That means
    \alert{forgetting the blue part of the previous schemes}.

    \begin{itemize}
        \item Upon entering a function, we know (ABI)
            \[ \cfa = \reg{rsp} - 8
            \qquad \ra = \cfa + 8 \]
        \item For each instruction, we know \alert{how it changes \cfa}.
        \item We assume \alert{\ra{} constant wrt. \cfa}.
            \begin{itemize}
                \item[$\leadsto$] only \cfa{} tracking matters (for unwinding)
            \end{itemize}
        \item We had a working strategy for a \alert{linear execution}
        \item We still have to handle
            \begin{itemize}
                \item \alert{\cfa{} expression}
                \item \alert{control flow graph}
            \end{itemize}
    \end{itemize}
\end{frame}

\begin{frame}{\cfa{} expression}
    Two possibilities:
    \begin{itemize}
        \item Either we track \cfa{} wrt. \reg{rsp}
            \begin{itemize}
                \item and update it after each instruction if needed
            \end{itemize}
        \item Or \reg{rbp} is used as base pointer: easy
    \end{itemize}
\end{frame}

\begin{frame}{Control flow graph}
    \begin{columns}[c]
        \column{0.4\textwidth}
        \lstinputlisting[language=C]{src/cfg/cfg.c}

        \column{0.30\textwidth}
        \begin{figure}
            \centering
            \includegraphics[width=\textwidth]{src/cfg/cfg.png}
        \end{figure}
    \end{columns}

    \begin{itemize}
        \item \alert{Upon split} (eg. \texttt{X})\alert{:} nothing special,
            propagate end state of X to child nodes A and B
        \item \alert{Upon join} (eg. \texttt{while\_end})\alert{:} check
            consistency of both input states
            \begin{itemize}
                \item If tricky, \texttt{gcc} will have used \reg{rbp}, even
                    with \texttt{-fomit-frame-pointer}.
            \end{itemize}
    \end{itemize}

\end{frame}

\begin{frame}{}
    \vfill
    \centering
    \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
        \Large\bf
        Demo time!
    \end{beamercolorbox}
    \vfill
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data compilation}

\subsection{Compilation ahead-of-time}

\begin{frame}{Compilation overview}
    \begin{itemize}
        \item Compiled to \alert{C code}
        \item C code then \alert{compiled to native binary} (gcc)
            \begin{itemize}
                \item[$\leadsto$] gcc optimisations for free
            \end{itemize}
        \item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
            \bigskip{}
        \item Morally a \alert{monolithic switch} on IPs
        \item Each case contains assembly that computes a \alert{row of the
            table}
    \end{itemize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[shrink]{Compilation example: generated C}
    \lstinputlisting[language=C]{src/fib7/fib7.eh_elf_basic.c}

    \pause{}
    \vspace{1em}
    \begin{center}
        The real code is optimised, but boils down to this.
    \end{center}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}{Mostly plug-and-play: libunwind interface}
    \begin{itemize}
        \item \alert{libunwind}: \textit{de facto} standard library for
            unwinding
        \item Relies on DWARF

            \bigskip{}

        \item \texttt{libunwind-eh\_elf}: alternative implementation using
            \ehelfs{}

        \item[$\leadsto$] almost \alert{``relink-and-play''} for existing projects!
    \end{itemize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Results}

\begin{frame}{Time performance}
    \begin{columns}
        \begin{column}{1.1\textwidth}
            \begin{table}[h]
                \centering
                \begin{tabular}{l l r r r r r}
                    \toprule
                    & \thead{Unwinding method} & \thead{Frames \\ unwound}
                        & \thead{Tot. time \\ ($\mu s$)}
                        & \thead{Avg. \\ time / frame \\ ($ns$)}
                        & \thead{Time ratio} \\
                    \midrule
                    \midrule

                    \multirow{2}{*}{\rotatebox{90}{\textbf{\prog{Gzip}}~~}}
                    &\alert{\ehelfs{}}
                        & 331523 % Frames unwound
                        & 25930 % Total time
                        & 78 % Avg time
                        & 1
                    \\
                    & \prog{libunwind}, \alert{cached}
                        & 331523 % Frames unwound
                        & 403292 % Total time
                        & 1217 % Avg time
                        & \alert{15.6}
                    \\
                    &\prog{libunwind}, \alert{uncached}
                        & 331523 % Frames unwound
                        & 2197296 % Total time
                        & 6635 % Avg time
                        & \alert{84.7}
                    \\
                    \midrule
                    \multirow{2}{*}{\rotatebox{90}{\textbf{\prog{hackbench}}}}
                    & \alert{\ehelfs{}}
                        & 152297 % Frames unwound
                        & 12941 % Total time
                        & 84 % Avg time
                        & 1
                        \\
                    & \prog{libunwind}, \alert{cached}
                        & 152297 % Frames unwound
                        & 316907 % Total time
                        & 2076 % Avg time
                        & \alert{24.6}
                        \\
                    & \prog{libunwind}, \alert{uncached}
                        & 152297 % Frames unwound
                        & 982697 % Total time
                        & 6439 % Avg time
                        & \alert{76.3}\vspace{0.8em}
                        \\
                        \bottomrule
                \end{tabular}
            \end{table}
        \end{column}
    \end{columns}
\end{frame}

\begin{frame}{Space overhead}
    \begin{table}[h]
        \centering
        \begin{tabular}{l r r r r}
            \toprule
            \thead{Shared object}
                & \thead{Original \\ \lstinline{.eh\_frame}}
                & \thead{Generated \\ \lstinline{eh_elf}  \lstinline{.text}}
                & \thead{\% of original \\ program size}
                & \thead{Growth \\ factor} \\
            \midrule
                find & 21.3 KiB & 68.3 KiB & 46.63 & 3.21 \\
                \hfill + libs & 196.6 KiB & 577.2 KiB & 19.75 & 2.94 \\
                \hline
                python3.7
                    & 160.0 B & 1.4 KiB & 355.98 & 8.33 \\
                \hfill + libs
                    & 449.0 KiB &        1.1 MiB &        23.77 & 2.61 \\
                \hline
                gzip & 5.1 KiB & 10.9 KiB & 16.48 & 2.13 \\
                \hfill + libs & 143.5 KiB & 413.1 KiB & 24.96 & 2.88 \\
                \hline
                hackbench
                    & 568.0 B & 3.2 KiB & 107.99 & 5.74 \\
                \hfill + libs
                    & 150.4 KiB & 439.4 KiB & 26.60 & 2.92 \\
                \hline
                sqlite & 121.7 KiB & 382.8 KiB & 34.68 & 3.14 \\
                \hfill + libs & 376.2 KiB & 1.1 MiB & 25.32 & 3.00 \\
                \bottomrule
        \end{tabular}
    \end{table}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Conclusion}
\setcounter{section}{0}

\begin{frame}{A fragment of our article}
    The original article \textbf{Reliable and Fast DWARF-based Stack Unwinding}
    contains

    \vspace{1em}

    \begin{itemize}
        \item{} DWARF unwinding tables validation;
        \item{} DWARF unwinding tables synthesis;
        \item{} DWARF-based unwinding speedup.
    \end{itemize}

    \vspace{1em}

    \begin{center}
        Come and chat if interested! \texttt{:)}
    \end{center}
\end{frame}

\end{document}