talk-2019-10-OOPSLA19/slides.tex

630 lines
20 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

% vim: spell spelllang=en
\documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer}
\usetheme{Warsaw}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{makecell}
\usepackage{ifthen}
\usepackage{colortbl}
\usepackage{tabularx}
\usepackage{pifont}
\usepackage{multirow}
\usepackage{texlib/my_listings}
\usepackage{texlib/specific}
\usepackage{texlib/common}
\usepackage{texlib/todo}
\usepackage{inconsolata}
\lstset{basicstyle=\footnotesize\ttfamily}
\renewcommand\theadalign{c}
\renewcommand\theadfont{\scriptsize\bfseries}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{headline}{}
\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
\newcommand{\slidecountline}{
\ifthenelse{\theframenumber = 0}
{}
{\insertframenumber/\inserttotalframenumber}}
\newcommand{\sectionline}{
\ifthenelse{\thesection = 0}
{}
{\Roman{section}~-- \insertsection}}
\newcommand{\cmark}{\color{OliveGreen}\ding{52}}
\newcommand{\xmark}{\color{BrickRed}\ding{56}}
\AtBeginSection{
\begin{frame}
\vfill
\centering
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
\usebeamerfont{title}\insertsectionhead\par%
\end{beamercolorbox}
\vfill
\end{frame}
}
\lstdefinelanguage{gdb}{
morekeywords={gdb},
sensitive=false,
}
\newcolumntype{b}{X}
\newcolumntype{s}{>{\hsize=.43\hsize}X}
\newcommand{\lstinl}
{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title[\sectionline] {Reliable and Fast DWARF-based Stack Unwinding}
\author[\slidecountline]{\textbf{Théophile Bastian},\\
\textbf{Stephen Kell}, \\
\textbf{Francesco Zappa Nardelli}}
\date{}
%\subject{}
%\logo{}
\institute{ENS Paris, University of Kent, Inria}
\begin{document}
\begin{frame}
\addtocounter{framenumber}{-1}
\titlepage{}
\vspace{-2em}
\begin{center}
\begin{align*}
\text{Slides: } &\text{\todo{add URL for this PDF}} \\
\end{align*}
\end{center}
\end{frame}
\begin{frame}{~}
\addtocounter{framenumber}{-1}
\tableofcontents[hideallsubsections]
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{DWARF and stack unwinding data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Introduction}
\begin{frame}[fragile]{We often use stack unwinding!}
\begin{columns}[c]
\begin{column}{0.70\textwidth}
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
Program received signal SIGSEGV.
0x54625 in fct_b at segfault.c:5
5 printf("%l\n", *b);
|\pause| (gdb) backtrace
#0 0x54625 in fct_b at segfault.c:5
#1 0x54663 in fct_a at segfault.c:10
#2 0x54674 in main at segfault.c:14
|\pause| (gdb) frame 1
#1 0x54663 in fct_a at segfault.c:10
10 fct_b((int*) a);
|\pause| (gdb) print a
$1 = 84
\end{lstlisting}
\vspace{-1em}
\pause{}
\begin{center}
\textbf{\Large How does it work?!}
\end{center}
\end{column}
\begin{column}{0.35\textwidth}
\pause{}
\includegraphics[width=0.95\linewidth]{img/call_stack}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Stack frames and unwinding}
\begin{frame}{Call stack and registers}
\begin{columns}[c]
\begin{column}{0.55\textwidth}
\begin{center}
\large\bf
How do we get the grandparent RA\@?
\medskip
Isn't it as trivial as \texttt{pop()}?
\vspace{2em}
\onslide<2>{We only have \reg{rsp} and \reg{rip}.}
\end{center}
\end{column}
\begin{column}{0.45\textwidth}
\includegraphics[width=0.95\linewidth]{img/call_stack}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{DWARF tables}
\newcolumntype{a}{>{\columncolor{RedOrange}}l}
\begin{frame}{DWARF unwinding data}
\vspace{2em}
\tt \footnotesize
\begin{tabular}{
>{\columncolor{YellowGreen}}l
>{\columncolor{Thistle}}l
l l l l l l
>{\columncolor{Apricot}}l}
~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\
0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\
0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\
0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\
0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\
0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a20 & rsp+32 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a22 & rsp+24 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a24 & rsp+16 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a26 & rsp+8 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a30 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
\end{tabular}
\pause{}
\vspace{-3cm}
\hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo}
\hspace{-1cm}
\end{frame}
\begin{frame}[t, fragile]{The real DWARF}
\begin{lstlisting}[numbers=none, language=]
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
DW_CFA_advance_loc: 2 to 0000000000084952
DW_CFA_def_cfa_offset: 16
DW_CFA_offset: r15 (r15) at cfa-16
DW_CFA_advance_loc: 2 to 0000000000084954
DW_CFA_def_cfa_offset: 24
DW_CFA_offset: r14 (r14) at cfa-24
DW_CFA_advance_loc: 2 to 0000000000084956
DW_CFA_def_cfa_offset: 32
DW_CFA_offset: r13 (r13) at cfa-32
DW_CFA_advance_loc: 2 to 0000000000084958
DW_CFA_def_cfa_offset: 40
DW_CFA_offset: r12 (r12) at cfa-40
DW_CFA_advance_loc: 1 to 0000000000084959
[...]
\end{lstlisting}
\begin{itemize}
\item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
by a \alert{Turing-complete bytecode}!}
\end{itemize}
\pause{}
\vspace{-6.5cm}
\begin{center}
\bf \fontsize{8cm}{1cm}
\colorbox{white}{\alert{Complex}} \\
\colorbox{white}{\alert{\& slow!}}
\end{center}
\end{frame}
\begin{frame}{Why does slow matter?}
\begin{itemize}
\item{} After all, we're talking about \alert{debugging procedures} ran
by a \alert{human being} (slower than the machine).
\ldots{}or are we?
\end{itemize}
\pause{}
\begin{center}
\textbf{\Large{}No!}
\end{center}
\begin{itemize}
\pause{}\item{} Pretty much any \alert{program analysis tool}
\pause{}\item{} \alert{Profiling} with polling profilers
\pause{}\item{} \alert{Exception handling} in C++
\end{itemize}
\vspace{2em}
\begin{center}
\textbf{\Large{}Debug data is not only for debugging}
\end{center}
\vspace{1em}
$\leadsto$ we might want \alert{an alternative time/space trade-off}
\end{frame}
\newcommand{\LinusMailOne}{
``Sorry, but last time was too f\dots painful. The whole (and
only) point of unwinders is to make debugging easy
when a bug occurs. But \alert{the dwarf unwinder had bugs}
itself, or \alert{our dwarf information had bugs}, and in either
case it actually turned several trivial bugs into a \alert{total
undebuggable hell}.''
}
\newcommand{\LinusMailTwo}{
``If you can \alert{mathematically prove that the unwinder is
correct} — even in the presence of bogus and actively
incorrect unwinding information — and never ever
follows a bad pointer, \alert{Ill reconsider}.''
}
\newcommand{\LinusSource}{
\hfill ---~Linus Torvalds, Kernel mailing list, 2012
}
\begin{frame}{A debugging hell: Linux kernel}
\LinusMailOne{}
\only<1-2>{
\vspace{1em}
\LinusSource{}
}
\vspace{1em}
\only<2>{
\begin{center}
\Large\bf
\alert{This is where we still are!}
\end{center}
}
\only<3>{
\LinusMailTwo{}
\vspace{1em}
\LinusSource{}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data as an abstract state}
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
\newcommand{\blknote}[1]
{\begin{block}{}
\centering\large
#1
\end{block}}
\newcommand{\blklnote}[1]
{\begin{block}{}
\large
#1
\end{block}}
\newcommand{\tblhl}{\rowcolor{Tan}}
\begin{frame}{Working on an example}
\newcommand{\firsttblrows}{
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}}
\rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8}
\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8}
\rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8}
\rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8}
\tblrowval{push}{\%r12}{rsp+32}{c-8}
\tblrowval{push}{\%rbp}{rsp+40}{c-8}
\tblrowval{push}{\%rbx}{rsp+48}{c-8}
\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8}
}
\only<-8>{
\begin{table}
\ttfamily\large
\begin{tabularx}{0.9\linewidth}{
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\firsttblrows{}%
\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8}
\tblrowval{pop}{\%rbx}{rsp+56}{c-8}
\tblrowval{pop}{\%rbp}{rsp+48}{c-8}
\end{tabularx}
\end{table}
\blknote{
\centering
\begin{overlayarea}{0.9\textwidth}{4.8ex}
\only<3>{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}
\only<4>{\texttt{push} decreases \reg{rsp} by 8: %
\alert{ra = *(\reg{rsp} + 8)}}
\only<5>{and again: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<6>{This \texttt{mov} leaves \reg{rsp} untouched: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<7>{The unwinding table can actually be seen as\\
an \alert{abstract interpretation} of the code\ldots}
\only<8>{\ldots and thus, for a given run, be
\alert{re-computed from scratch}}
\end{overlayarea}
}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data synthesis from binaries}
\begin{frame}{Why would synthesis be useful?}
\begin{itemize}
\item As said earlier, \alert{DWARF is complex}
\item Some compilers \alert{do not generate it}: hard to \alert{debug}
\& \alert{profile}.
\item Think of \alert{JIT-compiled assembly} (eg. JVM)
\item \ldots{}or even \alert{hand-written inlined assembly}!
\begin{itemize}
\item Painful enough to write for not bothering with DWARF
\item May not even be known by the programmer, breaks gdb
\item May be wrong (remember Linus!)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{What have we got so far?}
We now want to \alert{synthesize unwinding data}. That means
\alert{forgetting the blue part of the previous schemes}.
\begin{itemize}
\item Upon entering a function, we know (ABI)
\[ \cfa = \reg{rsp} - 8
\qquad \ra = \cfa + 8 \]
\item For each instruction, we know \alert{how it changes \cfa}.
\item We assume \alert{\ra{} constant wrt. \cfa}.
\begin{itemize}
\item[$\leadsto$] only \cfa{} tracking matters (for unwinding)
\end{itemize}
\item We had a working strategy for a \alert{linear execution}
\item We still have to handle
\begin{itemize}
\item \alert{\cfa{} expression}
\item \alert{control flow graph}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{\cfa{} expression}
Two possibilities:
\begin{itemize}
\item Either we track \cfa{} wrt. \reg{rsp}
\begin{itemize}
\item and update it after each instruction if needed
\end{itemize}
\item Or \reg{rbp} is used as base pointer: easy
\end{itemize}
\end{frame}
\begin{frame}{Control flow graph}
\begin{columns}[c]
\column{0.4\textwidth}
\lstinputlisting[language=C]{src/cfg/cfg.c}
\column{0.30\textwidth}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{src/cfg/cfg.png}
\end{figure}
\end{columns}
\begin{itemize}
\item \alert{Upon split} (eg. \texttt{X})\alert{:} nothing special,
propagate end state of X to child nodes A and B
\item \alert{Upon join} (eg. \texttt{while\_end})\alert{:} check
consistency of both input states
\begin{itemize}
\item If tricky, \texttt{gcc} will have used \reg{rbp}, even
with \texttt{-fomit-frame-pointer}.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{}
\vfill
\centering
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
\Large\bf
Demo time!
\end{beamercolorbox}
\vfill
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data compilation}
\subsection{Compilation ahead-of-time}
\begin{frame}{Compilation overview}
\begin{itemize}
\item Compiled to \alert{C code}
\item C code then \alert{compiled to native binary} (gcc)
\begin{itemize}
\item[$\leadsto$] gcc optimisations for free
\end{itemize}
\item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
\bigskip{}
\item Morally a \alert{monolithic switch} on IPs
\item Each case contains assembly that computes a \alert{row of the
table}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[shrink]{Compilation example: generated C}
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_basic.c}
\pause{}
\vspace{1em}
\begin{center}
The real code is optimised, but boils down to this.
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Mostly plug-and-play: libunwind interface}
\begin{itemize}
\item \alert{libunwind}: \textit{de facto} standard library for
unwinding
\item Relies on DWARF
\bigskip{}
\item \texttt{libunwind-eh\_elf}: alternative implementation using
\ehelfs{}
\item[$\leadsto$] almost \alert{``relink-and-play''} for existing projects!
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Results}
\begin{frame}{Time performance}
\begin{columns}
\begin{column}{1.1\textwidth}
\begin{table}[h]
\centering
\begin{tabular}{l l r r r r r}
\toprule
& \thead{Unwinding method} & \thead{Frames \\ unwound}
& \thead{Tot. time \\ ($\mu s$)}
& \thead{Avg. \\ time / frame \\ ($ns$)}
& \thead{Time ratio} \\
\midrule
\midrule
\multirow{2}{*}{\rotatebox{90}{\textbf{\prog{Gzip}}~~}}
&\alert{\ehelfs{}}
& 331523 % Frames unwound
& 25930 % Total time
& 78 % Avg time
& 1
\\
& \prog{libunwind}, \alert{cached}
& 331523 % Frames unwound
& 403292 % Total time
& 1217 % Avg time
& \alert{15.6}
\\
&\prog{libunwind}, \alert{uncached}
& 331523 % Frames unwound
& 2197296 % Total time
& 6635 % Avg time
& \alert{84.7}
\\
\midrule
\multirow{2}{*}{\rotatebox{90}{\textbf{\prog{hackbench}}}}
& \alert{\ehelfs{}}
& 152297 % Frames unwound
& 12941 % Total time
& 84 % Avg time
& 1
\\
& \prog{libunwind}, \alert{cached}
& 152297 % Frames unwound
& 316907 % Total time
& 2076 % Avg time
& \alert{24.6}
\\
& \prog{libunwind}, \alert{uncached}
& 152297 % Frames unwound
& 982697 % Total time
& 6439 % Avg time
& \alert{76.3}\vspace{0.8em}
\\
\bottomrule
\end{tabular}
\end{table}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{Space overhead}
\begin{table}[h]
\centering
\begin{tabular}{l r r r r}
\toprule
\thead{Shared object}
& \thead{Original \\ \lstinline{.eh\_frame}}
& \thead{Generated \\ \lstinline{eh_elf} \lstinline{.text}}
& \thead{\% of original \\ program size}
& \thead{Growth \\ factor} \\
\midrule
find & 21.3 KiB & 68.3 KiB & 46.63 & 3.21 \\
\hfill + libs & 196.6 KiB & 577.2 KiB & 19.75 & 2.94 \\
\hline
python3.7
& 160.0 B & 1.4 KiB & 355.98 & 8.33 \\
\hfill + libs
& 449.0 KiB & 1.1 MiB & 23.77 & 2.61 \\
\hline
gzip & 5.1 KiB & 10.9 KiB & 16.48 & 2.13 \\
\hfill + libs & 143.5 KiB & 413.1 KiB & 24.96 & 2.88 \\
\hline
hackbench
& 568.0 B & 3.2 KiB & 107.99 & 5.74 \\
\hfill + libs
& 150.4 KiB & 439.4 KiB & 26.60 & 2.92 \\
\hline
sqlite & 121.7 KiB & 382.8 KiB & 34.68 & 3.14 \\
\hfill + libs & 376.2 KiB & 1.1 MiB & 25.32 & 3.00 \\
\bottomrule
\end{tabular}
\end{table}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Conclusion}
\setcounter{section}{0}
\begin{frame}{A fragment of our article}
The original article \textbf{Reliable and Fast DWARF-based Stack Unwinding}
contains
\vspace{1em}
\begin{itemize}
\item{} DWARF unwinding tables validation;
\item{} DWARF unwinding tables synthesis;
\item{} DWARF-based unwinding speedup.
\end{itemize}
\vspace{1em}
\begin{center}
Come and chat if interested! \texttt{:)}
\end{center}
\end{frame}
\end{document}