2018-08-27 16:24:01 +02:00
|
|
|
% vim: spell spelllang=en
|
|
|
|
|
|
|
|
\documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer}
|
|
|
|
\usetheme{Warsaw}
|
|
|
|
\usepackage[utf8]{inputenc}
|
|
|
|
\usepackage[english]{babel}
|
|
|
|
\usepackage[T1]{fontenc}
|
|
|
|
\usepackage{amsmath}
|
|
|
|
\usepackage{amsfonts}
|
|
|
|
\usepackage{amssymb}
|
2018-08-29 22:14:26 +02:00
|
|
|
|
|
|
|
\usepackage{../shared/my_listings}
|
|
|
|
%\usepackage{../shared/my_hyperref}
|
|
|
|
\usepackage{../shared/specific}
|
|
|
|
\usepackage{../shared/common}
|
|
|
|
\usepackage{../shared/todo}
|
|
|
|
|
2018-08-27 16:24:01 +02:00
|
|
|
\setbeamertemplate{navigation symbols}{}
|
|
|
|
|
2018-09-02 02:07:59 +02:00
|
|
|
\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
|
|
|
|
|
2018-08-27 16:24:01 +02:00
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\author[Théophile Bastian]{Théophile \textsc{Bastian} \\
|
|
|
|
\small{Under supervision of Francesco Zappa Nardelli}}
|
|
|
|
\title{Internship defense, MPRI, M2}
|
|
|
|
\subtitle{Speeding up stack unwinding by compiling DWARF debugging data}
|
|
|
|
\date{March\ --\ August 2018}
|
|
|
|
%\subject{}
|
|
|
|
%\logo{}
|
|
|
|
\institute{Team PARKAS, INRIA, Paris}
|
|
|
|
|
|
|
|
\begin{document}
|
|
|
|
|
|
|
|
\begin{frame}
|
|
|
|
\addtocounter{framenumber}{-1}
|
|
|
|
\titlepage{}
|
|
|
|
|
|
|
|
\begin{center}
|
|
|
|
Slides: \url{https://tobast.fr/m2/slides.pdf} \\
|
|
|
|
Report: \url{https://tobast.fr/m2/report.pdf}
|
|
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
\begin{frame}
|
|
|
|
\addtocounter{framenumber}{-1}
|
|
|
|
\tableofcontents
|
|
|
|
\end{frame}
|
|
|
|
|
2018-08-29 22:14:26 +02:00
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\section{Stack unwinding data}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\subsection{Introduction}
|
|
|
|
|
|
|
|
\begin{frame}[fragile]{We often use stack unwinding!}
|
|
|
|
\begin{lstlisting}[language=, numbers=none, escapechar=|]
|
|
|
|
Program received signal SIGSEGV, Segmentation fault.
|
|
|
|
0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5
|
|
|
|
5 printf("%l\n", *m);
|
|
|
|
|\pause|
|
|
|
|
(gdb) backtrace
|
|
|
|
#0 0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5
|
|
|
|
#1 0x0000555555554663 in fct_a (n=42) at segfault.c:10
|
|
|
|
#2 0x0000555555554674 in main () at segfault.c:14
|
|
|
|
|\pause|
|
|
|
|
(gdb) frame 1
|
|
|
|
#1 0x0000555555554663 in fct_a (n=42) at segfault.c:10
|
|
|
|
10 fct_b((int*)(some_fct_a_var + 8));
|
|
|
|
|\pause|
|
|
|
|
(gdb) print some_fct_a_var
|
|
|
|
$1 = 84
|
|
|
|
\end{lstlisting}
|
|
|
|
|
|
|
|
\pause{}
|
|
|
|
\begin{center}
|
|
|
|
\textbf{\Large How does it work?!}
|
|
|
|
\end{center}
|
|
|
|
\vspace{1em}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\subsection{Stack frames and unwinding}
|
|
|
|
|
|
|
|
\begin{frame}{Call stack and registers}
|
|
|
|
\begin{columns}[c]
|
|
|
|
\begin{column}{0.65\textwidth}
|
|
|
|
\begin{itemize}
|
|
|
|
\item Programs use a \alert{call stack}
|
|
|
|
\item Organized in \alert{stack frames}
|
|
|
|
\begin{itemize}
|
|
|
|
\item Local variables
|
|
|
|
\item Function parameters
|
|
|
|
\item Keep track of nesting, registers and ``return
|
|
|
|
point''
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
|
|
|
|
|
|
Common registers:
|
|
|
|
\begin{itemize}
|
|
|
|
\item \reg{rip}: program counter (PC)
|
|
|
|
|
|
|
|
\item \reg{rsp}: stack pointer
|
|
|
|
|
|
|
|
\item \reg{rbp}: base pointer
|
|
|
|
\begin{itemize}
|
|
|
|
\item Saves \reg{rsp}
|
|
|
|
\item Easy access
|
|
|
|
\item Wastes a register
|
|
|
|
\item Not often used (x86\_64)
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
|
|
\end{column}
|
|
|
|
\begin{column}{0.35\textwidth}
|
|
|
|
\includegraphics[width=0.95\linewidth]{../shared/imgs/call_stack}
|
|
|
|
\end{column}
|
|
|
|
\end{columns}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
\begin{frame}{Isn't it as trivial as \texttt{pop()}?}
|
|
|
|
\begin{itemize}
|
|
|
|
\item This is only a \alert{blob of binary data} without mandatory
|
|
|
|
structure
|
|
|
|
\item We ignore \alert{which registers were saved}
|
|
|
|
\item We ignore \alert{whether \reg{rbp} was used}
|
|
|
|
\item We ignore \alert{where the return address is stored}
|
|
|
|
\item We ignore \alert{where the previous frame begins}
|
|
|
|
\end{itemize}
|
|
|
|
|
|
|
|
\medskip
|
|
|
|
|
|
|
|
But\ldots{} if we know how to \alert{unwind one}, we can \alert{recurse}!
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\subsection{DWARF tables}
|
|
|
|
|
|
|
|
\begin{frame}[fragile]{DWARF unwinding data}
|
|
|
|
|
|
|
|
\begin{lstlisting}[numbers=none, language=]
|
|
|
|
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
|
|
|
|
LOC CFA rbx rbp r12 r13 r14 r15 ra
|
|
|
|
0084950 rsp+8 u u u u u u c-8
|
|
|
|
0084952 rsp+16 u u u u u c-16 c-8
|
|
|
|
0084954 rsp+24 u u u u c-24 c-16 c-8
|
|
|
|
0084956 rsp+32 u u u c-32 c-24 c-16 c-8
|
|
|
|
0084958 rsp+40 u u c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084959 rsp+48 u c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
008495a rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084962 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a19 rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a1d rsp+48 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a1e rsp+40 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a20 rsp+32 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a22 rsp+24 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a24 rsp+16 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a26 rsp+8 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
0084a30 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8
|
|
|
|
\end{lstlisting}
|
|
|
|
|
|
|
|
\pause{}
|
|
|
|
|
|
|
|
\vspace{-4cm}
|
|
|
|
\hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo}
|
|
|
|
\hspace{-1cm}
|
|
|
|
|
|
|
|
\end{frame}
|
|
|
|
|
2018-09-02 02:07:59 +02:00
|
|
|
\begin{frame}[fragile]{The real DWARF}
|
|
|
|
\begin{lstlisting}[numbers=none, language=]
|
|
|
|
00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37
|
|
|
|
DW_CFA_advance_loc: 2 to 0000000000084952
|
|
|
|
DW_CFA_def_cfa_offset: 16
|
|
|
|
DW_CFA_offset: r15 (r15) at cfa-16
|
|
|
|
DW_CFA_advance_loc: 2 to 0000000000084954
|
|
|
|
DW_CFA_def_cfa_offset: 24
|
|
|
|
DW_CFA_offset: r14 (r14) at cfa-24
|
|
|
|
DW_CFA_advance_loc: 2 to 0000000000084956
|
|
|
|
DW_CFA_def_cfa_offset: 32
|
|
|
|
DW_CFA_offset: r13 (r13) at cfa-32
|
|
|
|
DW_CFA_advance_loc: 2 to 0000000000084958
|
|
|
|
DW_CFA_def_cfa_offset: 40
|
|
|
|
DW_CFA_offset: r12 (r12) at cfa-40
|
|
|
|
DW_CFA_advance_loc: 1 to 0000000000084959
|
|
|
|
DW_CFA_def_cfa_offset: 48
|
|
|
|
DW_CFA_offset: r6 (rbp) at cfa-48
|
|
|
|
DW_CFA_advance_loc: 1 to 000000000008495a
|
|
|
|
[...]
|
|
|
|
\end{lstlisting}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
\begin{frame}{Why does slow matter?}
|
|
|
|
\textbf{Do we really care about speed for unwinding?}
|
|
|
|
\begin{itemize}
|
|
|
|
|
|
|
|
\item{} After all, we're talking about \alert{debugging procedures} ran
|
|
|
|
by a \alert{human being} (slower than the machine).
|
|
|
|
|
|
|
|
\ldots{}or are we?
|
|
|
|
|
|
|
|
\pause{}\item{} \alert{Profiling} with polling profilers
|
|
|
|
|
|
|
|
\pause{}\item{} \alert{Exception handling} in C++
|
|
|
|
|
|
|
|
\end{itemize}
|
|
|
|
|
|
|
|
\vspace{2em}
|
|
|
|
|
|
|
|
\begin{center}
|
|
|
|
\textbf{Debug data is not only for debugging}
|
|
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\section{Compiling DWARF}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\subsection{Compilation Strategy}
|
|
|
|
|
|
|
|
\begin{frame}[fragile]{Types}
|
|
|
|
Generated data:
|
|
|
|
\lstinputlisting[language=C]{src/unwind_context.c}
|
|
|
|
\pause{}
|
|
|
|
|
|
|
|
\vspace{1em}
|
|
|
|
Function type:
|
|
|
|
\begin{lstlisting}[language=C]
|
|
|
|
unwind_context_t _eh_elf(
|
|
|
|
unwind_context_t, instruction_pointer_t); \end{lstlisting}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
\begin{frame}{Compilation overview}
|
|
|
|
\begin{itemize}
|
|
|
|
\item Compiled to \alert{C code}
|
|
|
|
\item C code then \alert{compiled to native binary} (gcc)
|
|
|
|
\begin{itemize}
|
|
|
|
\item[$\leadsto$] gcc optimisations for free
|
|
|
|
\end{itemize}
|
|
|
|
\item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{}
|
|
|
|
\bigskip{}
|
|
|
|
\item Morally a \alert{monolithic switch} on IPs
|
|
|
|
\item Each case fills the context structure
|
|
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\subsection{Outlining}
|
|
|
|
|
|
|
|
\begin{frame}{Size optimisation: outlining}
|
|
|
|
\begin{itemize}
|
|
|
|
\item This \alert{works}, but \alert{takes space}: about \alert{7 times
|
|
|
|
heavier} than regular DWARF\@.
|
|
|
|
|
|
|
|
\item DWARF optimisation strategy: \alert{alter previous row}. \\
|
|
|
|
Causes slowness: we cannot do that.
|
|
|
|
|
|
|
|
\item Remark: a lot of lines appear often.
|
|
|
|
\begin{itemize}
|
|
|
|
\item[$\leadsto$] \emph{outline} them!
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
|
|
|
|
|
|
\pause{}
|
|
|
|
|
|
|
|
\textbf{Outlining:}
|
|
|
|
\begin{itemize}
|
|
|
|
\item Turn the big switch into a binary search \alert{if/else tree}
|
|
|
|
\item \alert{Extract} the conditional bodies, put them afterwards
|
|
|
|
\item Jump to them using a \alert{label/goto}
|
|
|
|
\end{itemize}
|
|
|
|
|
|
|
|
\pause{}
|
|
|
|
|
|
|
|
\bigskip{}
|
|
|
|
\begin{center}
|
|
|
|
$\leadsto$ only \textbf{2.5 times heavier than DWARF}
|
|
|
|
\end{center}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\section{Benchmarking}
|
|
|
|
|
|
|
|
\begin{frame}{Benchmarking requirements}
|
2018-09-02 13:08:58 +02:00
|
|
|
\begin{enumerate}
|
|
|
|
\item Thousands of samples (single unwind: $10\,\mu{}s$)
|
|
|
|
\item Interesting enough program to unwind: nested functions, complex
|
|
|
|
FDEs
|
|
|
|
\item Mitigate caching: don't always unwind from the \emph{same} point
|
|
|
|
\item Yet be fair: don't always unwind from totally different places
|
|
|
|
\item Distribute evenly: if possible, also from within libraries
|
|
|
|
\end{enumerate}
|
|
|
|
|
|
|
|
\pause{}\vspace{1em}
|
|
|
|
|
|
|
|
\begin{itemize}
|
|
|
|
\item 2 $\implies$ exit hand-crafted program, exit CSmith
|
|
|
|
\item 5 $\implies$ cannot call the unwinding procedure by hand
|
|
|
|
\end{itemize}
|
2018-09-02 02:07:59 +02:00
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
2018-09-02 13:08:58 +02:00
|
|
|
\subsection{Unwinding using perf}
|
|
|
|
|
|
|
|
\begin{frame}{Presentation of perf}
|
|
|
|
\textbf{A profiler is used to\ldots}
|
|
|
|
\begin{itemize}
|
|
|
|
\item get readings of the \alert{time spent in each function}
|
|
|
|
\item detect ``hot paths'': functions you ought to optimize
|
|
|
|
\item \ldots{}and \alert{benchmark \ehelfs{}}!
|
|
|
|
\end{itemize}
|
|
|
|
|
|
|
|
\vspace{1em}\pause{}
|
|
|
|
\textbf{How does it work?}
|
|
|
|
\begin{itemize}
|
|
|
|
\item{} \alert{Polling profiler}: stops at regular intervals to perform
|
|
|
|
analyses
|
|
|
|
\item{} Upon polling, \alert{dumps the stack} to a file
|
|
|
|
\item{} In the analysis phase (after the program terminated),
|
|
|
|
\alert{unwinds all the stacks gathered} to get call paths
|
|
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
\begin{frame}{perf instrumentation}
|
|
|
|
\textbf{Instrumenting perf matches all the requirements!}
|
|
|
|
\vspace{1em}\pause{}
|
|
|
|
|
|
|
|
\begin{itemize}
|
|
|
|
\item{} \alert{Plug \ehelfs{} into perf}: use \ehelfs{} instead of
|
|
|
|
DWARF to analyze stack dumps
|
|
|
|
\item{} Implement \alert{unwinding performance counters} inside perf
|
|
|
|
\bigskip{}
|
|
|
|
|
|
|
|
\item{} Use perf on \alert{hackbench}, a kernel stress-test program
|
|
|
|
\begin{itemize}
|
|
|
|
\item Small program
|
|
|
|
\item Lots of calls
|
|
|
|
\item Relies on libc, libpthread
|
|
|
|
\end{itemize}
|
|
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
2018-09-02 02:07:59 +02:00
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
\section{A glance at future work}
|
|
|
|
|
2018-08-27 16:24:01 +02:00
|
|
|
\end{document}
|