talk-2019-10-OOPSLA19/slides.tex

552 lines
17 KiB
TeX
Raw Normal View History

% vim: spell spelllang=en
\documentclass[11pt,xcolor={usenames,dvipsnames}]{beamer}
\usetheme{Warsaw}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{makecell}
\usepackage{ifthen}
\usepackage{colortbl}
\usepackage{tabularx}
2018-11-06 12:28:17 +01:00
\usepackage{pifont}
2019-10-11 11:45:35 +02:00
\usepackage{multirow}
2019-10-14 16:29:41 +02:00
\usepackage[many]{tcolorbox}
\usepackage[absolute,overlay]{textpos}
\usetikzlibrary{arrows.meta}
\usepackage{texlib/my_listings}
\usepackage{texlib/specific}
\usepackage{texlib/common}
\usepackage{texlib/todo}
\usepackage{inconsolata}
\lstset{basicstyle=\footnotesize\ttfamily}
\renewcommand\theadalign{c}
\renewcommand\theadfont{\scriptsize\bfseries}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{headline}{}
\newcommand{\thenalert}[1]{\only<1>{#1}\only<2>{\alert{#1}}}
\newcommand{\slidecountline}{
\ifthenelse{\theframenumber = 0}
{}
{\insertframenumber/\inserttotalframenumber}}
2018-11-06 12:28:17 +01:00
\newcommand{\cmark}{\color{OliveGreen}\ding{52}}
\newcommand{\xmark}{\color{BrickRed}\ding{56}}
\newcommand{\sectiontitleframe}{
\begin{frame}
\vfill
\centering
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
\usebeamerfont{title}\insertsectionhead\par%
\end{beamercolorbox}
\vfill
\end{frame}}
\lstdefinelanguage{gdb}{
morekeywords={gdb},
sensitive=false,
}
2019-10-14 16:53:16 +02:00
\setbeamertemplate{footline}
{
\leavevmode%
\hbox{%
\hskip0.9\paperwidth
%\begin{beamercolorbox}[wd=.4\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
% \usebeamerfont{author in head/foot}\insertshortauthor
%\end{beamercolorbox}%
\begin{beamercolorbox}[center, wd=.1\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}%
\slidecountline{}
\end{beamercolorbox}}%
\vskip0pt%
}
\newcolumntype{b}{X}
\newcolumntype{s}{>{\hsize=.43\hsize}X}
\newcommand{\lstinl}
{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2019-10-14 16:53:16 +02:00
\title[] {Reliable and Fast DWARF-based Stack Unwinding}
\author[\slidecountline]{\textbf{Théophile Bastian}\\
\textbf{Stephen Kell} \\
2019-10-08 12:00:43 +02:00
\textbf{Francesco Zappa Nardelli}}
\date{}
%\subject{}
%\logo{}
2019-10-08 12:00:43 +02:00
\institute{ENS Paris, University of Kent, Inria}
\begin{document}
\begin{frame}
\addtocounter{framenumber}{-1}
\titlepage{}
\vspace{-2em}
2019-10-14 16:29:41 +02:00
\begin{columns}
\begin{column}{0.55\textwidth}
\begin{tcolorbox}[halign=center, colframe=blue]
\textbf{Webpage} (incl. slides)
\smallskip
\vspace{0.5em}
{\url{https://huit.re/frdwarf}}\\
\vspace{0.5em}
\end{tcolorbox}
\end{column}
\begin{column}{0.55\textwidth}
\begin{tcolorbox}[colframe=blue]
\begin{center}\textbf{Funding}\end{center}
\vspace{-1em}
\smallskip
ONR Vertica \\
Google Research Fellowship
\end{tcolorbox}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{DWARF and stack unwinding data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Introduction}
\begin{frame}[fragile]{}
\begin{columns}[c]
2019-10-14 20:08:08 +02:00
\begin{column}{0.65\textwidth}
2019-10-15 12:13:55 +02:00
\begin{lstlisting}[basicstyle=\tt,language=gdb, numbers=none, escapechar=|]
$ ./a.out
Segmentation fault.
|\pause|(gdb) backtrace
2019-10-14 20:16:20 +02:00
#0 |0x54625| in fct_b
#1 |\color{blue}0x54663| in fct_a
#2 |\color{red}0x54674| in main
\end{lstlisting}
\pause{}
\begin{center}
\textbf{\Large How does it work?!}
\end{center}
\end{column}
\begin{column}{0.35\textwidth}
\pause{}
\includegraphics[width=0.95\linewidth]{img/call_stack}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Stack frames and unwinding}
2019-10-14 16:55:43 +02:00
\begin{frame}
\begin{columns}[c]
2019-10-14 20:08:08 +02:00
\begin{column}{0.65\textwidth}
\begin{center}
\large\bf
How do we get the RA\@?\\Easy, \reg{rbp}!
\vspace{2em}
\onslide<2>{What if we only have \reg{rsp}?}
\end{center}
\end{column}
2019-10-14 20:08:08 +02:00
\begin{column}{0.35\textwidth}
\includegraphics[width=0.95\linewidth]{img/call_stack}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{DWARF tables}
\newcolumntype{a}{>{\columncolor{RedOrange}}l}
\begin{frame}{DWARF unwinding data}
\vspace{2em}
\tt \footnotesize
2019-10-14 20:08:08 +02:00
\begin{center}
\begin{tabular}{
>{\columncolor{YellowGreen}}l
>{\columncolor{Thistle}}l
l l l l l l
>{\columncolor{Apricot}}l}
~LOC & CFA & rbx & rbp & r12 & r13 & r14 & r15 & ra \\
0084950 & rsp+8 & u & u & u & u & u & u & c-8 \\
0084952 & rsp+16 & u & u & u & u & u & c-16 & c-8 \\
0084954 & rsp+24 & u & u & u & u & c-24 & c-16 & c-8 \\
0084956 & rsp+32 & u & u & u & c-32 & c-24 & c-16 & c-8 \\
0084958 & rsp+40 & u & u & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084959 & rsp+48 & u & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
\rowcolor{Aquamarine} 008495a & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084962 & rsp+64 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a19 & rsp+56 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1d & rsp+48 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
0084a1e & rsp+40 & c-56 & c-48 & c-40 & c-32 & c-24 & c-16 & c-8 \\
2019-10-14 20:08:08 +02:00
\end{tabular}
\end{center}
2019-10-14 17:34:51 +02:00
\vspace{1em}
2019-10-14 17:34:51 +02:00
\only<1>{\vspace{19mm}}
\begin{columns}
\begin{column}{0.50\textwidth}
\only<2->{
\begin{tcolorbox}[enhanced, halign=center, frame hidden, colback=YellowGreen]
\textbf{For each instruction\ldots}\\
(identified by its program counter)
\end{tcolorbox}
}
\end{column}
\begin{column}{0.50\textwidth}
\only<3->{
\begin{tcolorbox}[enhanced, halign=center, frame hidden,
interior style={right color=Apricot, left color=Thistle}]
\textbf{\ldots{}an expression to compute its return address
location on the stack}
\end{tcolorbox}
}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[t, fragile]{The real DWARF}
\begin{lstlisting}[numbers=none, language=]
2019-10-14 17:59:08 +02:00
30 24 34 FDE pc=004020..004040
DW_CFA_def_cfa_offset: 16
2019-10-14 17:59:08 +02:00
DW_CFA_advance_loc: 6 to 0000000000004026
DW_CFA_def_cfa_offset: 24
2019-10-14 17:59:08 +02:00
DW_CFA_advance_loc: 10 to 0000000000004030
DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 8; DW_OP_breg16 (rip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit11; DW_OP_ge; DW_OP_lit3; DW_OP_shl; DW_OP_plus)
[...]
\end{lstlisting}
\begin{itemize}
\item[\textbf{$\longrightarrow$}] \textbf{\alert{constructed} on-demand
2019-10-14 17:59:08 +02:00
by a \alert{Turing-complete stack machine}!}
\end{itemize}
2019-10-15 12:13:55 +02:00
\only<2->{
\begin{textblock*}{0.90\textwidth}[0.5,0](0.5\paperwidth,0.17\paperheight)%
\begin{tcolorbox}[halign=center, colframe=red, colback=Lavender]
\bf \huge
Complex \,\& \,slow
\end{tcolorbox}
\end{textblock*}
}
\only<3->{
\begin{textblock*}{0.90\textwidth}[0.5,0](0.5\paperwidth,0.35\paperheight)%
\begin{tcolorbox}[halign=center, colframe=red, colback=Lavender]
\huge
\textbf{Pervasive:}\\ relied upon by debuggers, profilers, C++
exceptions \\
\medskip{}
\textbf{$\leadsto$ not only for debuggers!}
\end{tcolorbox}
\end{textblock*}
}
\end{frame}
2018-11-08 19:37:04 +01:00
\newcommand{\LinusMailOne}{
``Sorry, but last time was too f\dots painful. The whole (and
only) point of unwinders is to make debugging easy
when a bug occurs. But \alert{the dwarf unwinder had bugs}
itself, or \alert{our dwarf information had bugs}, and in either
case it actually turned several trivial bugs into a \alert{total
undebuggable hell}.''
2018-11-08 19:37:04 +01:00
}
\newcommand{\LinusMailTwo}{
``If you can \alert{mathematically prove that the unwinder is
correct} — even in the presence of bogus and actively
incorrect unwinding information — and never ever
follows a bad pointer, \alert{Ill reconsider}.''
2018-11-08 19:37:04 +01:00
}
\newcommand{\LinusSource}{
\hfill ---~Linus Torvalds, 2012
2018-11-08 19:37:04 +01:00
}
2019-10-14 16:55:43 +02:00
\begin{frame}
2019-10-14 19:08:12 +02:00
\begin{columns}
\begin{column}{0.75\textwidth}
\LinusMailOne{}
\end{column}
\begin{column}{0.25\textwidth}
\includegraphics[width=\textwidth]{img/roundtorvalds.png}
\end{column}
\end{columns}
2019-10-08 12:00:43 +02:00
\only<1-2>{
\vspace{1em}
\LinusSource{}
}
\vspace{1em}
2019-10-08 12:00:43 +02:00
\only<2>{
\begin{center}
\Large\bf
\alert{This is where we still are!}
\end{center}
}
2019-10-08 12:00:43 +02:00
\only<3>{
\LinusMailTwo{}
\vspace{1em}
\LinusSource{}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data as an abstract execution of the assembly}
\sectiontitleframe{}
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
\newcommand{\blknote}[1]
{\begin{block}{}
\centering\large
#1
\end{block}}
\newcommand{\blklnote}[1]
{\begin{block}{}
\large
#1
\end{block}}
\newcommand{\tblhl}{\rowcolor{Tan}}
2019-10-14 16:55:43 +02:00
\begin{frame}
\newcommand{\firsttblrows}{
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}}
\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8}
\rowonly<5>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8}
\rowonly<6>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8}
\rowonly<7>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8}
\tblrowval{push}{\%r12}{rsp+32}{c-8}
\tblrowval{push}{\%rbp}{rsp+40}{c-8}
\tblrowval{push}{\%rbx}{rsp+48}{c-8}
\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8}
}
{\only<3>{
\begin{textblock*}{0.90\textwidth}[0.5,0.5](0.5\paperwidth,0.5\paperheight)%
\begin{tcolorbox}[halign=center, colframe=red, colback=Lavender]
\large
\alert{\bf Assumptions:}
\begin{itemize}
\item the assembly is was generated by a compiler
\item which also generated unwinding data
\item and I have a reliable DWARF parser
\end{itemize}
\end{tcolorbox}
\end{textblock*}
}}
\only<-9>{
\begin{table}
\ttfamily\large
\begin{tabularx}{0.9\linewidth}{
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\firsttblrows{}%
\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8}
\tblrowval{pop}{\%rbx}{rsp+56}{c-8}
\tblrowval{pop}{\%rbp}{rsp+48}{c-8}
\end{tabularx}
\end{table}
\blknote{
\centering
\begin{overlayarea}{0.9\textwidth}{4.8ex}
\only<4>{Upon function call, \alert{ra = *(\reg{rsp})}}
\only<5>{\texttt{push} decreases \reg{rsp} by 8: %
\alert{ra = *(\reg{rsp} + 8)}}
\only<6>{and again: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<7>{This \texttt{mov} leaves \reg{rsp} untouched: %
\alert{ra = *(\reg{rsp} + 16)}}
\only<8>{The unwinding table captures an \alert{abstract execution}
of the code\ldots}
\only<9>{\ldots and thus can be \alert{synthesized from the binary}.}
\end{overlayarea}
}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2018-11-14 10:13:19 +01:00
\section{Unwinding data synthesis from binaries}
%\begin{frame}{Why would synthesis be useful?}
% \begin{itemize}
% \item As said earlier, \alert{DWARF is complex}
% \item Some compilers \alert{do not generate it}: hard to \alert{debug}
% \& \alert{profile}.
% \item Think of \alert{JIT-compiled assembly} (eg. JVM)
% \item \ldots{}or even \alert{hand-written inlined assembly}!
% \begin{itemize}
% \item Painful enough to write for not bothering with DWARF
% \item May not even be known by the programmer, breaks gdb
% \item May be wrong (remember Linus!)
% \end{itemize}
% \end{itemize}
%\end{frame}
\begin{frame}{How do we actually synthesize?}
2019-10-11 11:57:32 +02:00
\begin{itemize}
2019-10-15 12:23:38 +02:00
\item Upon entering a function, we know
2018-11-07 08:50:04 +01:00
\[ \cfa = \reg{rsp} - 8
\qquad \ra = \cfa + 8 \]
\item The semantics of each instruction specifies \alert{how it changes \cfa}.
2018-11-08 19:37:04 +01:00
\begin{itemize}
\item Heuristic to decide whether we index with \reg{rbp} or
\reg{rsp}
2019-10-11 11:57:32 +02:00
\end{itemize}
\item By performing a symbolic execution, we can \alert{synthesize the
unwinding table} line by line.
\item Control flow: forward data-flow analysis
\item The fixpoints are immediate, cf article
2018-11-07 08:50:04 +01:00
\end{itemize}
\end{frame}
2018-11-08 20:35:15 +01:00
\begin{frame}{}
\vfill
\centering
\begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
2019-10-15 10:44:37 +02:00
\Large
2018-11-08 20:35:15 +01:00
Demo time!
\end{beamercolorbox}
\vfill
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data compilation}
\sectiontitleframe{}
\subsection{Compilation ahead-of-time}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2019-10-14 16:55:43 +02:00
\begin{frame}[shrink]
\vspace{0.5cm}
\begin{tikzpicture}
\begin{scope}[every node/.style={rectangle,thick,draw,scale=0.95}]
\node (dwarf) at (0, 0) {
2019-10-15 12:13:55 +02:00
\lstinputlisting[basicstyle=\tiny\tt,numbers=none,language=]{src/dw_plt_abbr}
};
2019-10-08 12:13:01 +02:00
\only<2->{
\node (table) at (0.5\textwidth, -0.23\textheight) {
\tiny\tt
\begin{tabular}{
>{\columncolor{YellowGreen}}l
>{\columncolor{Thistle}}l
l l
>{\columncolor{Apricot}}l}
LOC & CFA & rbx & rbp & ra \\
0084950 & rsp+8 & u & u & c-8 \\
0084952 & rsp+16 & u & u & c-8 \\
0084954 & rsp+24 & u & u & c-8 \\
0084956 & rsp+32 & u & u & c-8 \\
\end{tabular}
};
}
\only<3->{
\node (csrc) at (0, -0.6\textheight) {
\lstinputlisting[basicstyle=\tiny,numbers=none,language=C]{src/fib7/fib7.eh_elf_basic.c}
};
\node (ehelf) at (0.55\textwidth, -0.75\textheight) {
ELF file:
``\ehelf{}''
};
}
\end{scope}
\begin{scope}[>={Stealth[black]},
every node/.style={fill=white,rectangle},
every path/.style={draw=black,very thick}]
\only<2->{\path [->] (dwarf) -| node {runtime} (table);}
\only<3->{
\path [->] (dwarf) edge node {ahead of time} (csrc);
\path [->] (csrc) -| node {gcc, AoT} (ehelf);
}
\end{scope}
\end{tikzpicture}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2019-10-14 16:55:43 +02:00
\begin{frame}
\begin{itemize}
\item \alert{libunwind}: \textit{de facto} standard library for
unwinding
\bigskip{}
\item \texttt{libunwind-eh\_elf}: alternative implementation using
\ehelfs{}
2019-10-08 12:13:01 +02:00
\item[$\leadsto$] almost \alert{``relink-and-play''} for existing projects!
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Results}
\begin{frame}{Performances}
\begin{itemize}
\item \alert{Speedup}: x15 (\prog{gzip}) to x25 (\prog{hackbench}) vs.
libunwind
\begin{itemize}
\item libunwind: state of the art, aggressive caching.
\end{itemize}
\item \alert{Space overhead}: x2.6 to x3 vs. DWARF
\vspace{2em}
\item[$\leadsto$] Alternative time/space trade-off, favorable eg. for
profiling.
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2018-11-08 19:37:04 +01:00
\section*{Conclusion}
\setcounter{section}{0}
2019-10-15 12:00:37 +02:00
\begin{frame}{A lot of things remain to be done!}
2019-10-08 12:00:43 +02:00
\begin{itemize}
2019-10-15 12:00:37 +02:00
\item{} Synthesis + compare = verification of unwinding data!
\item{} Synthesis could be integrated in compilers: support for inline
assembly, fallback, \ldots
\item{} Speedup could be implemented in profilers, debuggers,
\ldots{}
\item{} Probably many more cool things to do!
2019-10-08 12:00:43 +02:00
\end{itemize}
2019-10-08 12:00:43 +02:00
\vspace{1em}
\begin{center}
2019-10-08 12:00:43 +02:00
Come and chat if interested! \texttt{:)}
\end{center}
\end{frame}
\end{document}