% vim: spell spelllang=en \documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer} \usetheme{Warsaw} \usepackage[utf8]{inputenc} \usepackage[english]{babel} \usepackage[T1]{fontenc} \usepackage{amsmath} \usepackage{amsfonts} \usepackage{amssymb} \usepackage{../shared/my_listings} %\usepackage{../shared/my_hyperref} \usepackage{../shared/specific} \usepackage{../shared/common} \usepackage{../shared/todo} \setbeamertemplate{navigation symbols}{} \newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \author[Théophile Bastian]{Théophile \textsc{Bastian} \\ \small{Under supervision of Francesco Zappa Nardelli}} \title{Internship defense, MPRI, M2} \subtitle{Speeding up stack unwinding by compiling DWARF debugging data} \date{March\ --\ August 2018} %\subject{} %\logo{} \institute{Team PARKAS, INRIA, Paris} \begin{document} \begin{frame} \addtocounter{framenumber}{-1} \titlepage{} \begin{center} Slides: \url{https://tobast.fr/m2/slides.pdf} \\ Report: \url{https://tobast.fr/m2/report.pdf} \end{center} \end{frame} \begin{frame} \addtocounter{framenumber}{-1} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Stack unwinding data} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Introduction} \begin{frame}[fragile]{We often use stack unwinding!} \begin{lstlisting}[language=, numbers=none, escapechar=|] Program received signal SIGSEGV, Segmentation fault. 0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5 5 printf("%l\n", *m); |\pause| (gdb) backtrace #0 0x0000555555554625 in fct_b (m=0x5c) at segfault.c:5 #1 0x0000555555554663 in fct_a (n=42) at segfault.c:10 #2 0x0000555555554674 in main () at segfault.c:14 |\pause| (gdb) frame 1 #1 0x0000555555554663 in fct_a (n=42) at segfault.c:10 10 fct_b((int*)(some_fct_a_var + 8)); |\pause| (gdb) print some_fct_a_var $1 = 84 \end{lstlisting} \pause{} \begin{center} \textbf{\Large How does it work?!} \end{center} \vspace{1em} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Stack frames and unwinding} \begin{frame}{Call stack and registers} \begin{columns}[c] \begin{column}{0.65\textwidth} \begin{itemize} \item Programs use a \alert{call stack} \item Organized in \alert{stack frames} \begin{itemize} \item Local variables \item Function parameters \item Keep track of nesting, registers and ``return point'' \end{itemize} \end{itemize} Common registers: \begin{itemize} \item \reg{rip}: program counter (PC) \item \reg{rsp}: stack pointer \item \reg{rbp}: base pointer \begin{itemize} \item Saves \reg{rsp} \item Easy access \item Wastes a register \item Not often used (x86\_64) \end{itemize} \end{itemize} \end{column} \begin{column}{0.35\textwidth} \includegraphics[width=0.95\linewidth]{../shared/imgs/call_stack} \end{column} \end{columns} \end{frame} \begin{frame}{Isn't it as trivial as \texttt{pop()}?} \begin{itemize} \item This is only a \alert{blob of binary data} without mandatory structure \item We ignore \alert{which registers were saved} \item We ignore \alert{whether \reg{rbp} was used} \item We ignore \alert{where the return address is stored} \item We ignore \alert{where the previous frame begins} \end{itemize} \medskip But\ldots{} if we know how to \alert{unwind one}, we can \alert{recurse}! \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{DWARF tables} \begin{frame}[fragile]{DWARF unwinding data} \begin{lstlisting}[numbers=none, language=] 00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37 LOC CFA rbx rbp r12 r13 r14 r15 ra 0084950 rsp+8 u u u u u u c-8 0084952 rsp+16 u u u u u c-16 c-8 0084954 rsp+24 u u u u c-24 c-16 c-8 0084956 rsp+32 u u u c-32 c-24 c-16 c-8 0084958 rsp+40 u u c-40 c-32 c-24 c-16 c-8 0084959 rsp+48 u c-48 c-40 c-32 c-24 c-16 c-8 008495a rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084962 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a19 rsp+56 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a1d rsp+48 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a1e rsp+40 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a20 rsp+32 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a22 rsp+24 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a24 rsp+16 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a26 rsp+8 c-56 c-48 c-40 c-32 c-24 c-16 c-8 0084a30 rsp+64 c-56 c-48 c-40 c-32 c-24 c-16 c-8 \end{lstlisting} \pause{} \vspace{-4cm} \hfill\includegraphics[height=3cm, angle=45, origin=c]{img/dwarf_logo} \hspace{-1cm} \end{frame} \begin{frame}[fragile]{The real DWARF} \begin{lstlisting}[numbers=none, language=] 00009b30 48 009b34 FDE cie=0000 pc=0084950..0084b37 DW_CFA_advance_loc: 2 to 0000000000084952 DW_CFA_def_cfa_offset: 16 DW_CFA_offset: r15 (r15) at cfa-16 DW_CFA_advance_loc: 2 to 0000000000084954 DW_CFA_def_cfa_offset: 24 DW_CFA_offset: r14 (r14) at cfa-24 DW_CFA_advance_loc: 2 to 0000000000084956 DW_CFA_def_cfa_offset: 32 DW_CFA_offset: r13 (r13) at cfa-32 DW_CFA_advance_loc: 2 to 0000000000084958 DW_CFA_def_cfa_offset: 40 DW_CFA_offset: r12 (r12) at cfa-40 DW_CFA_advance_loc: 1 to 0000000000084959 DW_CFA_def_cfa_offset: 48 DW_CFA_offset: r6 (rbp) at cfa-48 DW_CFA_advance_loc: 1 to 000000000008495a [...] \end{lstlisting} \end{frame} \begin{frame}{Why does slow matter?} \textbf{Do we really care about speed for unwinding?} \begin{itemize} \item{} After all, we're talking about \alert{debugging procedures} ran by a \alert{human being} (slower than the machine). \ldots{}or are we? \pause{}\item{} \alert{Profiling} with polling profilers \pause{}\item{} \alert{Exception handling} in C++ \end{itemize} \vspace{2em} \begin{center} \textbf{Debug data is not only for debugging} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Compiling DWARF} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Compilation Strategy} \begin{frame}[fragile]{Types} Generated data: \lstinputlisting[language=C]{src/unwind_context.c} \pause{} \vspace{1em} Function type: \begin{lstlisting}[language=C] unwind_context_t _eh_elf( unwind_context_t, instruction_pointer_t); \end{lstlisting} \end{frame} \begin{frame}{Compilation overview} \begin{itemize} \item Compiled to \alert{C code} \item C code then \alert{compiled to native binary} (gcc) \begin{itemize} \item[$\leadsto$] gcc optimisations for free \end{itemize} \item Compiled as \alert{separate \texttt{.so} files}, called \ehelfs{} \bigskip{} \item Morally a \alert{monolithic switch} on IPs \item Each case fills the context structure \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Outlining} \begin{frame}{Size optimisation: outlining} \begin{itemize} \item This \alert{works}, but \alert{takes space}: about \alert{7 times heavier} than regular DWARF\@. \item DWARF optimisation strategy: \alert{alter previous row}. \\ Causes slowness: we cannot do that. \item Remark: a lot of lines appear often. \begin{itemize} \item[$\leadsto$] \emph{outline} them! \end{itemize} \end{itemize} \pause{} \textbf{Outlining:} \begin{itemize} \item Turn the big switch into a binary search \alert{if/else tree} \item \alert{Extract} the conditional bodies, put them afterwards \item Jump to them using a \alert{label/goto} \end{itemize} \pause{} \bigskip{} \begin{center} $\leadsto$ only \textbf{2.5 times heavier than DWARF} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Benchmarking} \begin{frame}{Benchmarking requirements} \todo{} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{A glance at future work} \end{document}