Modifications made with Francesco + remove useless section title slides
This commit is contained in:
parent
f350dfa4d6
commit
93dcc441a3
1 changed files with 62 additions and 196 deletions
258
slides.tex
258
slides.tex
|
@ -43,7 +43,7 @@
|
|||
\newcommand{\cmark}{\color{OliveGreen}\ding{52}}
|
||||
\newcommand{\xmark}{\color{BrickRed}\ding{56}}
|
||||
|
||||
\AtBeginSection{
|
||||
\newcommand{\sectiontitleframe}{
|
||||
\begin{frame}
|
||||
\vfill
|
||||
\centering
|
||||
|
@ -51,8 +51,7 @@
|
|||
\usebeamerfont{title}\insertsectionhead\par%
|
||||
\end{beamercolorbox}
|
||||
\vfill
|
||||
\end{frame}
|
||||
}
|
||||
\end{frame}}
|
||||
|
||||
\lstdefinelanguage{gdb}{
|
||||
morekeywords={gdb},
|
||||
|
@ -68,8 +67,8 @@
|
|||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\title[\sectionline] {Reliable and Fast DWARF-based Stack Unwinding}
|
||||
\author[\slidecountline]{\textbf{Théophile Bastian},\\
|
||||
\textbf{Stephen Kell}, \\
|
||||
\author[\slidecountline]{\textbf{Théophile Bastian}\\
|
||||
\textbf{Stephen Kell} \\
|
||||
\textbf{Francesco Zappa Nardelli}}
|
||||
\date{}
|
||||
%\subject{}
|
||||
|
@ -84,15 +83,11 @@
|
|||
|
||||
\vspace{-2em}
|
||||
\begin{center}
|
||||
\begin{align*}
|
||||
\text{Slides: } &\text{\todo{add URL for this PDF}} \\
|
||||
\end{align*}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
{\large \url{https://huit.re/frdwarf}}\\
|
||||
{\todo{font size} Slides, paper, code}
|
||||
|
||||
\begin{frame}{~}
|
||||
\addtocounter{framenumber}{-1}
|
||||
\tableofcontents[hideallsubsections]
|
||||
\todo{FUNDING: ONR Vertica + Google Research Fellowship (logos)}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -101,27 +96,18 @@
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Introduction}
|
||||
|
||||
\begin{frame}[fragile]{We often use stack unwinding!}
|
||||
\begin{frame}[fragile]{}
|
||||
\begin{columns}[c]
|
||||
\begin{column}{0.70\textwidth}
|
||||
\begin{lstlisting}[language=gdb, numbers=none, escapechar=|]
|
||||
Program received signal SIGSEGV.
|
||||
0x54625 in fct_b at segfault.c:5
|
||||
5 printf("%l\n", *b);
|
||||
$ ./a.out
|
||||
Segmentation fault.
|
||||
|
||||
|\pause| (gdb) backtrace
|
||||
#0 0x54625 in fct_b at segfault.c:5
|
||||
#1 0x54663 in fct_a at segfault.c:10
|
||||
#2 0x54674 in main at segfault.c:14
|
||||
|
||||
|\pause| (gdb) frame 1
|
||||
#1 0x54663 in fct_a at segfault.c:10
|
||||
10 fct_b((int*) a);
|
||||
|
||||
|\pause| (gdb) print a
|
||||
$1 = 84
|
||||
|\pause|(gdb) backtrace
|
||||
#0 0x54625 in fct_b
|
||||
#1 0x54663 in fct_a
|
||||
#2 0x54674 in main
|
||||
\end{lstlisting}
|
||||
\vspace{-1em}
|
||||
\pause{}
|
||||
\begin{center}
|
||||
\textbf{\Large How does it work?!}
|
||||
|
@ -142,15 +128,11 @@ $1 = 84
|
|||
\begin{column}{0.55\textwidth}
|
||||
\begin{center}
|
||||
\large\bf
|
||||
How do we get the grandparent RA\@?
|
||||
|
||||
\medskip
|
||||
|
||||
Isn't it as trivial as \texttt{pop()}?
|
||||
How do we get the RA\@?\\Easy, \reg{rbp}!
|
||||
|
||||
\vspace{2em}
|
||||
|
||||
\onslide<2>{We only have \reg{rsp} and \reg{rip}.}
|
||||
\onslide<2>{What if we only have \reg{rsp}?}
|
||||
|
||||
\end{center}
|
||||
\end{column}
|
||||
|
@ -280,7 +262,7 @@ $1 = 84
|
|||
follows a bad pointer, \alert{I’ll reconsider}.''
|
||||
}
|
||||
\newcommand{\LinusSource}{
|
||||
\hfill ---~Linus Torvalds, Kernel mailing list, 2012
|
||||
\hfill ---~Linus Torvalds, 2012
|
||||
}
|
||||
\begin{frame}{A debugging hell: Linux kernel}
|
||||
\LinusMailOne{}
|
||||
|
@ -307,7 +289,8 @@ $1 = 84
|
|||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Unwinding data as an abstract state}
|
||||
\section{Unwinding data as an abstract execution of the assembly}
|
||||
\sectiontitleframe{}
|
||||
|
||||
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
|
||||
\newcommand{\blknote}[1]
|
||||
|
@ -353,17 +336,16 @@ $1 = 84
|
|||
\blknote{
|
||||
\centering
|
||||
\begin{overlayarea}{0.9\textwidth}{4.8ex}
|
||||
\only<3>{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}
|
||||
\only<3>{Upon function call, \alert{ra = *(\reg{rsp})}}
|
||||
\only<4>{\texttt{push} decreases \reg{rsp} by 8: %
|
||||
\alert{ra = *(\reg{rsp} + 8)}}
|
||||
\only<5>{and again: %
|
||||
\alert{ra = *(\reg{rsp} + 16)}}
|
||||
\only<6>{This \texttt{mov} leaves \reg{rsp} untouched: %
|
||||
\alert{ra = *(\reg{rsp} + 16)}}
|
||||
\only<7>{The unwinding table can actually be seen as\\
|
||||
an \alert{abstract interpretation} of the code\ldots}
|
||||
\only<8>{\ldots and thus, for a given run, be
|
||||
\alert{re-computed from scratch}}
|
||||
\only<7>{The unwinding table captures an \alert{abstract execution}
|
||||
of the code\ldots}
|
||||
\only<8>{\ldots and thus can be \alert{synthesized from the binary}.}
|
||||
\end{overlayarea}
|
||||
}
|
||||
}
|
||||
|
@ -372,79 +354,38 @@ $1 = 84
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Unwinding data synthesis from binaries}
|
||||
|
||||
\begin{frame}{Why would synthesis be useful?}
|
||||
\begin{itemize}
|
||||
\item As said earlier, \alert{DWARF is complex}
|
||||
\item Some compilers \alert{do not generate it}: hard to \alert{debug}
|
||||
\& \alert{profile}.
|
||||
\item Think of \alert{JIT-compiled assembly} (eg. JVM)
|
||||
\item \ldots{}or even \alert{hand-written inlined assembly}!
|
||||
\begin{itemize}
|
||||
\item Painful enough to write for not bothering with DWARF
|
||||
\item May not even be known by the programmer, breaks gdb
|
||||
\item May be wrong (remember Linus!)
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What have we got so far?}
|
||||
We now want to \alert{synthesize unwinding data}. That means
|
||||
\alert{forgetting the blue part of the previous schemes}.
|
||||
%\begin{frame}{Why would synthesis be useful?}
|
||||
% \begin{itemize}
|
||||
% \item As said earlier, \alert{DWARF is complex}
|
||||
% \item Some compilers \alert{do not generate it}: hard to \alert{debug}
|
||||
% \& \alert{profile}.
|
||||
% \item Think of \alert{JIT-compiled assembly} (eg. JVM)
|
||||
% \item \ldots{}or even \alert{hand-written inlined assembly}!
|
||||
% \begin{itemize}
|
||||
% \item Painful enough to write for not bothering with DWARF
|
||||
% \item May not even be known by the programmer, breaks gdb
|
||||
% \item May be wrong (remember Linus!)
|
||||
% \end{itemize}
|
||||
% \end{itemize}
|
||||
%\end{frame}
|
||||
|
||||
\begin{frame}{How do we actually synthesize?}
|
||||
\begin{itemize}
|
||||
\item Upon entering a function, we know (ABI)
|
||||
\[ \cfa = \reg{rsp} - 8
|
||||
\qquad \ra = \cfa + 8 \]
|
||||
\item For each instruction, we know \alert{how it changes \cfa}.
|
||||
\item We assume \alert{\ra{} constant wrt. \cfa}.
|
||||
\item The semantics of each instruction specifies \alert{how it changes \cfa}.
|
||||
\begin{itemize}
|
||||
\item[$\leadsto$] only \cfa{} tracking matters (for unwinding)
|
||||
\end{itemize}
|
||||
\item We had a working strategy for a \alert{linear execution}
|
||||
\item We still have to handle
|
||||
\begin{itemize}
|
||||
\item \alert{\cfa{} expression}
|
||||
\item \alert{control flow graph}
|
||||
\item Heuristic to decide whether we index with \reg{rbp} or
|
||||
\reg{rsp}
|
||||
\end{itemize}
|
||||
\item By performing a symbolic execution, we can \alert{synthesize the
|
||||
unwinding table} line by line.
|
||||
\item Control flow: forward data-flow analysis
|
||||
\item The fixpoints are immediate, cf article
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{\cfa{} expression}
|
||||
Two possibilities:
|
||||
\begin{itemize}
|
||||
\item Either we track \cfa{} wrt. \reg{rsp}
|
||||
\begin{itemize}
|
||||
\item and update it after each instruction if needed
|
||||
\end{itemize}
|
||||
\item Or \reg{rbp} is used as base pointer: easy
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Control flow graph}
|
||||
\begin{columns}[c]
|
||||
\column{0.4\textwidth}
|
||||
\lstinputlisting[language=C]{src/cfg/cfg.c}
|
||||
|
||||
\column{0.30\textwidth}
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=\textwidth]{src/cfg/cfg.png}
|
||||
\end{figure}
|
||||
\end{columns}
|
||||
|
||||
\begin{itemize}
|
||||
\item \alert{Upon split} (eg. \texttt{X})\alert{:} nothing special,
|
||||
propagate end state of X to child nodes A and B
|
||||
\item \alert{Upon join} (eg. \texttt{while\_end})\alert{:} check
|
||||
consistency of both input states
|
||||
\begin{itemize}
|
||||
\item If tricky, \texttt{gcc} will have used \reg{rbp}, even
|
||||
with \texttt{-fomit-frame-pointer}.
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{}
|
||||
\vfill
|
||||
\centering
|
||||
|
@ -458,6 +399,8 @@ $1 = 84
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Unwinding data compilation}
|
||||
|
||||
\sectiontitleframe{}
|
||||
|
||||
\subsection{Compilation ahead-of-time}
|
||||
|
||||
\begin{frame}{Compilation overview}
|
||||
|
@ -494,7 +437,6 @@ $1 = 84
|
|||
\begin{itemize}
|
||||
\item \alert{libunwind}: \textit{de facto} standard library for
|
||||
unwinding
|
||||
\item Relies on DWARF
|
||||
|
||||
\bigskip{}
|
||||
|
||||
|
@ -509,98 +451,22 @@ $1 = 84
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Results}
|
||||
|
||||
\begin{frame}{Time performance}
|
||||
\begin{columns}
|
||||
\begin{column}{1.1\textwidth}
|
||||
\begin{table}[h]
|
||||
\centering
|
||||
\begin{tabular}{l l r r r r r}
|
||||
\toprule
|
||||
& \thead{Unwinding method} & \thead{Frames \\ unwound}
|
||||
& \thead{Tot. time \\ ($\mu s$)}
|
||||
& \thead{Avg. \\ time / frame \\ ($ns$)}
|
||||
& \thead{Time ratio} \\
|
||||
\midrule
|
||||
\midrule
|
||||
\begin{frame}{Performances}
|
||||
\begin{itemize}
|
||||
\item \alert{Speedup}: x15 (\prog{gzip}) to x25 (\prog{hackbench}) vs.
|
||||
libunwind
|
||||
|
||||
\multirow{2}{*}{\rotatebox{90}{\textbf{\prog{Gzip}}~~}}
|
||||
&\alert{\ehelfs{}}
|
||||
& 331523 % Frames unwound
|
||||
& 25930 % Total time
|
||||
& 78 % Avg time
|
||||
& 1
|
||||
\\
|
||||
& \prog{libunwind}, \alert{cached}
|
||||
& 331523 % Frames unwound
|
||||
& 403292 % Total time
|
||||
& 1217 % Avg time
|
||||
& \alert{15.6}
|
||||
\\
|
||||
&\prog{libunwind}, \alert{uncached}
|
||||
& 331523 % Frames unwound
|
||||
& 2197296 % Total time
|
||||
& 6635 % Avg time
|
||||
& \alert{84.7}
|
||||
\\
|
||||
\midrule
|
||||
\multirow{2}{*}{\rotatebox{90}{\textbf{\prog{hackbench}}}}
|
||||
& \alert{\ehelfs{}}
|
||||
& 152297 % Frames unwound
|
||||
& 12941 % Total time
|
||||
& 84 % Avg time
|
||||
& 1
|
||||
\\
|
||||
& \prog{libunwind}, \alert{cached}
|
||||
& 152297 % Frames unwound
|
||||
& 316907 % Total time
|
||||
& 2076 % Avg time
|
||||
& \alert{24.6}
|
||||
\\
|
||||
& \prog{libunwind}, \alert{uncached}
|
||||
& 152297 % Frames unwound
|
||||
& 982697 % Total time
|
||||
& 6439 % Avg time
|
||||
& \alert{76.3}\vspace{0.8em}
|
||||
\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
\begin{itemize}
|
||||
\item libunwind: state of the art, aggressive caching.
|
||||
\end{itemize}
|
||||
|
||||
\begin{frame}{Space overhead}
|
||||
\begin{table}[h]
|
||||
\centering
|
||||
\begin{tabular}{l r r r r}
|
||||
\toprule
|
||||
\thead{Shared object}
|
||||
& \thead{Original \\ \lstinline{.eh\_frame}}
|
||||
& \thead{Generated \\ \lstinline{eh_elf} \lstinline{.text}}
|
||||
& \thead{\% of original \\ program size}
|
||||
& \thead{Growth \\ factor} \\
|
||||
\midrule
|
||||
find & 21.3 KiB & 68.3 KiB & 46.63 & 3.21 \\
|
||||
\hfill + libs & 196.6 KiB & 577.2 KiB & 19.75 & 2.94 \\
|
||||
\hline
|
||||
python3.7
|
||||
& 160.0 B & 1.4 KiB & 355.98 & 8.33 \\
|
||||
\hfill + libs
|
||||
& 449.0 KiB & 1.1 MiB & 23.77 & 2.61 \\
|
||||
\hline
|
||||
gzip & 5.1 KiB & 10.9 KiB & 16.48 & 2.13 \\
|
||||
\hfill + libs & 143.5 KiB & 413.1 KiB & 24.96 & 2.88 \\
|
||||
\hline
|
||||
hackbench
|
||||
& 568.0 B & 3.2 KiB & 107.99 & 5.74 \\
|
||||
\hfill + libs
|
||||
& 150.4 KiB & 439.4 KiB & 26.60 & 2.92 \\
|
||||
\hline
|
||||
sqlite & 121.7 KiB & 382.8 KiB & 34.68 & 3.14 \\
|
||||
\hfill + libs & 376.2 KiB & 1.1 MiB & 25.32 & 3.00 \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
\item \alert{Space overhead}: x2.6 to x3 vs. DWARF
|
||||
|
||||
\vspace{2em}
|
||||
|
||||
\item[$\leadsto$] Alternative time/space trade-off, favorable eg. for
|
||||
profiling.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
|
Loading…
Reference in a new issue