Modifications made with Francesco + remove useless section title slides

This commit is contained in:
Théophile Bastian 2019-10-14 16:06:59 +02:00
parent f350dfa4d6
commit 93dcc441a3

View file

@ -43,7 +43,7 @@
\newcommand{\cmark}{\color{OliveGreen}\ding{52}}
\newcommand{\xmark}{\color{BrickRed}\ding{56}}
\AtBeginSection{
\newcommand{\sectiontitleframe}{
\begin{frame}
\vfill
\centering
@ -51,8 +51,7 @@
\usebeamerfont{title}\insertsectionhead\par%
\end{beamercolorbox}
\vfill
\end{frame}
}
\end{frame}}
\lstdefinelanguage{gdb}{
morekeywords={gdb},
@ -68,8 +67,8 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title[\sectionline] {Reliable and Fast DWARF-based Stack Unwinding}
\author[\slidecountline]{\textbf{Théophile Bastian},\\
\textbf{Stephen Kell}, \\
\author[\slidecountline]{\textbf{Théophile Bastian}\\
\textbf{Stephen Kell} \\
\textbf{Francesco Zappa Nardelli}}
\date{}
%\subject{}
@ -84,15 +83,11 @@
\vspace{-2em}
\begin{center}
\begin{align*}
\text{Slides: } &\text{\todo{add URL for this PDF}} \\
\end{align*}
\end{center}
\end{frame}
{\large \url{https://huit.re/frdwarf}}\\
{\todo{font size} Slides, paper, code}
\begin{frame}{~}
\addtocounter{framenumber}{-1}
\tableofcontents[hideallsubsections]
\todo{FUNDING: ONR Vertica + Google Research Fellowship (logos)}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -101,27 +96,18 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Introduction}
\begin{frame}[fragile]{We often use stack unwinding!}
\begin{frame}[fragile]{}
\begin{columns}[c]
\begin{column}{0.70\textwidth}
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
Program received signal SIGSEGV.
0x54625 in fct_b at segfault.c:5
5 printf("%l\n", *b);
$ ./a.out
Segmentation fault.
|\pause|(gdb) backtrace
#0 0x54625 in fct_b at segfault.c:5
#1 0x54663 in fct_a at segfault.c:10
#2 0x54674 in main at segfault.c:14
|\pause| (gdb) frame 1
#1 0x54663 in fct_a at segfault.c:10
10 fct_b((int*) a);
|\pause| (gdb) print a
$1 = 84
#0 0x54625 in fct_b
#1 0x54663 in fct_a
#2 0x54674 in main
\end{lstlisting}
\vspace{-1em}
\pause{}
\begin{center}
\textbf{\Large How does it work?!}
@ -142,15 +128,11 @@ $1 = 84
\begin{column}{0.55\textwidth}
\begin{center}
\large\bf
How do we get the grandparent RA\@?
\medskip
Isn't it as trivial as \texttt{pop()}?
How do we get the RA\@?\\Easy, \reg{rbp}!
\vspace{2em}
\onslide<2>{We only have \reg{rsp} and \reg{rip}.}
\onslide<2>{What if we only have \reg{rsp}?}
\end{center}
\end{column}
@ -280,7 +262,7 @@ $1 = 84
follows a bad pointer, \alert{Ill reconsider}.''
}
\newcommand{\LinusSource}{
\hfill ---~Linus Torvalds, Kernel mailing list, 2012
\hfill ---~Linus Torvalds, 2012
}
\begin{frame}{A debugging hell: Linux kernel}
\LinusMailOne{}
@ -307,7 +289,8 @@ $1 = 84
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data as an abstract state}
\section{Unwinding data as an abstract execution of the assembly}
\sectiontitleframe{}
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
\newcommand{\blknote}[1]
@ -353,17 +336,16 @@ $1 = 84
\blknote{
\centering
\begin{overlayarea}{0.9\textwidth}{4.8ex}
\only<3>{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}
\only<3>{Upon function call, \alert{ra = *(\reg{rsp})}}
\only<4>{\texttt{push} decreases \reg{rsp} by 8: %
\alert{ra = *(\reg{rsp} + 8)}}
\only<5>{and again: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<6>{This \texttt{mov} leaves \reg{rsp} untouched: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<7>{The unwinding table can actually be seen as\\
an \alert{abstract interpretation} of the code\ldots}
\only<8>{\ldots and thus, for a given run, be
\alert{re-computed from scratch}}
\only<7>{The unwinding table captures an \alert{abstract execution}
of the code\ldots}
\only<8>{\ldots and thus can be \alert{synthesized from the binary}.}
\end{overlayarea}
}
}
@ -372,77 +354,36 @@ $1 = 84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data synthesis from binaries}
\begin{frame}{Why would synthesis be useful?}
\begin{itemize}
\item As said earlier, \alert{DWARF is complex}
\item Some compilers \alert{do not generate it}: hard to \alert{debug}
\& \alert{profile}.
\item Think of \alert{JIT-compiled assembly} (eg. JVM)
\item \ldots{}or even \alert{hand-written inlined assembly}!
\begin{itemize}
\item Painful enough to write for not bothering with DWARF
\item May not even be known by the programmer, breaks gdb
\item May be wrong (remember Linus!)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{What have we got so far?}
We now want to \alert{synthesize unwinding data}. That means
\alert{forgetting the blue part of the previous schemes}.
%\begin{frame}{Why would synthesis be useful?}
% \begin{itemize}
% \item As said earlier, \alert{DWARF is complex}
% \item Some compilers \alert{do not generate it}: hard to \alert{debug}
% \& \alert{profile}.
% \item Think of \alert{JIT-compiled assembly} (eg. JVM)
% \item \ldots{}or even \alert{hand-written inlined assembly}!
% \begin{itemize}
% \item Painful enough to write for not bothering with DWARF
% \item May not even be known by the programmer, breaks gdb
% \item May be wrong (remember Linus!)
% \end{itemize}
% \end{itemize}
%\end{frame}
\begin{frame}{How do we actually synthesize?}
\begin{itemize}
\item Upon entering a function, we know (ABI)
\[ \cfa = \reg{rsp} - 8
\qquad \ra = \cfa + 8 \]
\item For each instruction, we know \alert{how it changes \cfa}.
\item We assume \alert{\ra{} constant wrt. \cfa}.
\item The semantics of each instruction specifies \alert{how it changes \cfa}.
\begin{itemize}
\item[$\leadsto$] only \cfa{} tracking matters (for unwinding)
\item Heuristic to decide whether we index with \reg{rbp} or
\reg{rsp}
\end{itemize}
\item We had a working strategy for a \alert{linear execution}
\item We still have to handle
\begin{itemize}
\item \alert{\cfa{} expression}
\item \alert{control flow graph}
\item By performing a symbolic execution, we can \alert{synthesize the
unwinding table} line by line.
\item Control flow: forward data-flow analysis
\item The fixpoints are immediate, cf article
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{\cfa{} expression}
Two possibilities:
\begin{itemize}
\item Either we track \cfa{} wrt. \reg{rsp}
\begin{itemize}
\item and update it after each instruction if needed
\end{itemize}
\item Or \reg{rbp} is used as base pointer: easy
\end{itemize}
\end{frame}
\begin{frame}{Control flow graph}
\begin{columns}[c]
\column{0.4\textwidth}
\lstinputlisting[language=C]{src/cfg/cfg.c}
\column{0.30\textwidth}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{src/cfg/cfg.png}
\end{figure}
\end{columns}
\begin{itemize}
\item \alert{Upon split} (eg. \texttt{X})\alert{:} nothing special,
propagate end state of X to child nodes A and B
\item \alert{Upon join} (eg. \texttt{while\_end})\alert{:} check
consistency of both input states
\begin{itemize}
\item If tricky, \texttt{gcc} will have used \reg{rbp}, even
with \texttt{-fomit-frame-pointer}.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{}
@ -458,6 +399,8 @@ $1 = 84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data compilation}
\sectiontitleframe{}
\subsection{Compilation ahead-of-time}
\begin{frame}{Compilation overview}
@ -494,7 +437,6 @@ $1 = 84
\begin{itemize}
\item \alert{libunwind}: \textit{de facto} standard library for
unwinding
\item Relies on DWARF
\bigskip{}
@ -509,98 +451,22 @@ $1 = 84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Results}
\begin{frame}{Time performance}
\begin{columns}
\begin{column}{1.1\textwidth}
\begin{table}[h]
\centering
\begin{tabular}{l l r r r r r}
\toprule
& \thead{Unwinding method} & \thead{Frames \\ unwound}
& \thead{Tot. time \\ ($\mu s$)}
& \thead{Avg. \\ time / frame \\ ($ns$)}
& \thead{Time ratio} \\
\midrule
\midrule
\begin{frame}{Performances}
\begin{itemize}
\item \alert{Speedup}: x15 (\prog{gzip}) to x25 (\prog{hackbench}) vs.
libunwind
\multirow{2}{*}{\rotatebox{90}{\textbf{\prog{Gzip}}~~}}
&\alert{\ehelfs{}}
& 331523 % Frames unwound
& 25930 % Total time
& 78 % Avg time
& 1
\\
& \prog{libunwind}, \alert{cached}
& 331523 % Frames unwound
& 403292 % Total time
& 1217 % Avg time
& \alert{15.6}
\\
&\prog{libunwind}, \alert{uncached}
& 331523 % Frames unwound
& 2197296 % Total time
& 6635 % Avg time
& \alert{84.7}
\\
\midrule
\multirow{2}{*}{\rotatebox{90}{\textbf{\prog{hackbench}}}}
& \alert{\ehelfs{}}
& 152297 % Frames unwound
& 12941 % Total time
& 84 % Avg time
& 1
\\
& \prog{libunwind}, \alert{cached}
& 152297 % Frames unwound
& 316907 % Total time
& 2076 % Avg time
& \alert{24.6}
\\
& \prog{libunwind}, \alert{uncached}
& 152297 % Frames unwound
& 982697 % Total time
& 6439 % Avg time
& \alert{76.3}\vspace{0.8em}
\\
\bottomrule
\end{tabular}
\end{table}
\end{column}
\end{columns}
\end{frame}
\begin{itemize}
\item libunwind: state of the art, aggressive caching.
\end{itemize}
\begin{frame}{Space overhead}
\begin{table}[h]
\centering
\begin{tabular}{l r r r r}
\toprule
\thead{Shared object}
& \thead{Original \\ \lstinline{.eh\_frame}}
& \thead{Generated \\ \lstinline{eh_elf} \lstinline{.text}}
& \thead{\% of original \\ program size}
& \thead{Growth \\ factor} \\
\midrule
find & 21.3 KiB & 68.3 KiB & 46.63 & 3.21 \\
\hfill + libs & 196.6 KiB & 577.2 KiB & 19.75 & 2.94 \\
\hline
python3.7
& 160.0 B & 1.4 KiB & 355.98 & 8.33 \\
\hfill + libs
& 449.0 KiB & 1.1 MiB & 23.77 & 2.61 \\
\hline
gzip & 5.1 KiB & 10.9 KiB & 16.48 & 2.13 \\
\hfill + libs & 143.5 KiB & 413.1 KiB & 24.96 & 2.88 \\
\hline
hackbench
& 568.0 B & 3.2 KiB & 107.99 & 5.74 \\
\hfill + libs
& 150.4 KiB & 439.4 KiB & 26.60 & 2.92 \\
\hline
sqlite & 121.7 KiB & 382.8 KiB & 34.68 & 3.14 \\
\hfill + libs & 376.2 KiB & 1.1 MiB & 25.32 & 3.00 \\
\bottomrule
\end{tabular}
\end{table}
\item \alert{Space overhead}: x2.6 to x3 vs. DWARF
\vspace{2em}
\item[$\leadsto$] Alternative time/space trade-off, favorable eg. for
profiling.
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%