Work on synthesis, "A real bug!" to be finished
This commit is contained in:
parent
f2b188bd7a
commit
420eeb6a30
3 changed files with 291 additions and 21 deletions
287
slides.tex
287
slides.tex
|
@ -12,9 +12,9 @@
|
|||
\usepackage{makecell}
|
||||
\usepackage{ifthen}
|
||||
\usepackage{colortbl}
|
||||
\usepackage{tabularx}
|
||||
|
||||
\usepackage{texlib/my_listings}
|
||||
%\usepackage{texlib/my_hyperref}
|
||||
\usepackage{texlib/specific}
|
||||
\usepackage{texlib/common}
|
||||
\usepackage{texlib/todo}
|
||||
|
@ -54,15 +54,25 @@
|
|||
sensitive=false,
|
||||
}
|
||||
|
||||
|
||||
\newcolumntype{b}{X}
|
||||
\newcolumntype{s}{>{\hsize=.43\hsize}X}
|
||||
|
||||
\newcommand{\lstinl}
|
||||
{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\author[\slidecountline]{Théophile \textsc{Bastian} \\
|
||||
\footnotesize{under the supervision of Francesco Zappa Nardelli}}
|
||||
\title[\sectionline]
|
||||
{Speeding up stack unwinding by compiling DWARF debug data}
|
||||
\date{March\ --\ August 2018}
|
||||
\title[\sectionline] {Growing the DWARF tougher:\\
|
||||
synthesis, validation and compilation}
|
||||
\author[\slidecountline]{\textbf{Théophile Bastian} \\ \vspace{0.5em}
|
||||
{{\footnotesize{}Based on work done with}\\
|
||||
\textbf{Francesco Zappa Nardelli},
|
||||
\textbf{Stephen Kell},
|
||||
\textbf{Simon Ser}}}
|
||||
\date{}
|
||||
%\subject{}
|
||||
%\logo{}
|
||||
\institute{Team PARKAS, INRIA, Paris}
|
||||
\institute{ENS Paris, INRIA}
|
||||
|
||||
\begin{document}
|
||||
|
||||
|
@ -72,6 +82,7 @@
|
|||
|
||||
\vspace{-2em}
|
||||
\begin{center}
|
||||
\todo{}
|
||||
\begin{align*}
|
||||
\text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
|
||||
\text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
|
||||
|
@ -85,7 +96,7 @@
|
|||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Stack unwinding data}
|
||||
\section{DWARF and stack unwinding data}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Introduction}
|
||||
|
@ -214,9 +225,11 @@ $1 = 84
|
|||
|
||||
\pause{}
|
||||
|
||||
\vspace{-5.5cm}
|
||||
\vspace{-6.5cm}
|
||||
\begin{center}
|
||||
\bf \fontsize{8cm}{1cm}\colorbox{white}{\alert{Slow!}}
|
||||
\bf \fontsize{8cm}{1cm}
|
||||
\colorbox{white}{\alert{Complex}} \\
|
||||
\colorbox{white}{\alert{\& slow!}}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
@ -249,10 +262,248 @@ $1 = 84
|
|||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Compiling stack unwinding data ahead-of-time}
|
||||
\begin{frame}{Difficult to generate}
|
||||
Major concern with DWARF\@: it is \alert{difficult to generate}
|
||||
(correctly).
|
||||
|
||||
\subsection*{}
|
||||
\begin{itemize}
|
||||
\item{} \alert{Hard to generate}: each compiler pass must keep it in
|
||||
sync
|
||||
\item{} Most of it is \alert{seldom used} (\eg{} unwinding data of
|
||||
dusty code), and thus \alert{seldom tested}
|
||||
\end{itemize}
|
||||
\vspace{1em}
|
||||
|
||||
Yields to
|
||||
\begin{itemize}
|
||||
\item{} unreliable DWARF\@: can cause headaches when debugging
|
||||
\item{} or not generated at all (eg. OCaml until recently) \todo{Check
|
||||
this}
|
||||
\end{itemize}
|
||||
|
||||
\vspace{1em}
|
||||
\begin{center}
|
||||
\Large\bf
|
||||
$\leadsto$ Complex, buggy, untested
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{A debugging hell: Linux kernel}
|
||||
``Sorry, but last time was too f\dots painful. The whole (and
|
||||
only) point of unwinders is to make debugging easy
|
||||
when a bug occurs. But \alert{the dwarf unwinder had bugs}
|
||||
itself, or \alert{our dwarf information had bugs}, and in either
|
||||
case it actually turned several trivial bugs into a \alert{total
|
||||
undebuggable hell}.''
|
||||
|
||||
\vspace{1em}
|
||||
\only<1>{\hfill ---~Linus Torvalds, Kernel mailing list, 2012}
|
||||
\pause{}
|
||||
|
||||
``If you can \alert{mathematically prove that the unwinder is
|
||||
correct} — even in the presence of bogus and actively
|
||||
incorrect unwinding information — and never ever
|
||||
follows a bad pointer, \alert{I’ll reconsider}.''
|
||||
|
||||
\vspace{1em}
|
||||
\hfill ---~Linus Torvalds, Kernel mailing list, 2012
|
||||
|
||||
\pause{}\vspace{1em}
|
||||
\begin{center}
|
||||
\Large\bf
|
||||
\alert{This is where we still are!}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Unwinding data validation}
|
||||
|
||||
\begin{frame}{Main idea}
|
||||
\begin{itemize}
|
||||
\item If we follow \alert{one path of execution}, we can track the
|
||||
return address location
|
||||
\item If we read unwinding data \alert{at runtime}, we can \alert{check
|
||||
the RA consistency} at each step
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
|
||||
\newcommand{\blknote}[1]
|
||||
{\begin{block}{}
|
||||
\centering\large
|
||||
#1
|
||||
\end{block}}
|
||||
\newcommand{\blklnote}[1]
|
||||
{\begin{block}{}
|
||||
\large
|
||||
#1
|
||||
\end{block}}
|
||||
\newcommand{\tblhl}{\rowcolor{Tan}}
|
||||
|
||||
\begin{frame}{Example}
|
||||
\begin{table}
|
||||
\ttfamily\large
|
||||
\begin{tabularx}{0.9\linewidth}{
|
||||
l
|
||||
b
|
||||
>{\columncolor{SkyBlue}}s
|
||||
>{\columncolor{SkyBlue}}s
|
||||
}
|
||||
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}}
|
||||
\rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8}
|
||||
\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8}
|
||||
\rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8}
|
||||
\rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8}
|
||||
\tblrowval{push}{\%r12}{rsp+32}{c-8}
|
||||
\tblrowval{push}{\%rbp}{rsp+40}{c-8}
|
||||
\tblrowval{push}{\%rbx}{rsp+48}{c-8}
|
||||
\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8}
|
||||
\rowonly<9>{\tblhl{}} \tblrowval{cmp}{\$0x1,\%edi}{rsp+160}{c-8}%
|
||||
\only<-8>{%
|
||||
\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8}
|
||||
\tblrowval{pop}{\%rbx}{rsp+56}{c-8}
|
||||
\tblrowval{pop}{\%rbp}{rsp+48}{c-8}}
|
||||
\end{tabularx}
|
||||
\end{table}
|
||||
|
||||
\only<-8> {
|
||||
\only<3>{\blknote{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}}
|
||||
\only<4>{\blknote{\texttt{push} decreases \reg{rsp} by 8: %
|
||||
\alert{ra = *(\reg{rsp} + 8)}}}
|
||||
\only<5>{\blknote{and again: %
|
||||
\alert{ra = *(\reg{rsp} + 16)}}}
|
||||
\only<6>{\blknote{This \texttt{mov} leaves \reg{rsp} untouched: %
|
||||
\alert{ra = *(\reg{rsp} + 16)}}}
|
||||
\only<7>{\blknote{The unwinding table can actually be seen as\\
|
||||
an \alert{abstract interpretation} of the code\ldots}}
|
||||
\only<8>{\blknote{\ldots and thus, for a given run, be
|
||||
\alert{re-computed for verification}}}
|
||||
}
|
||||
\only<9-> {
|
||||
\vspace{-2em}
|
||||
\only<9>{\blklnote{If, within an execution,
|
||||
\begin{itemize}
|
||||
\item ra = \texttt{*(0xFFFF1098)}
|
||||
\item \reg{rsp} = \texttt{0xFFFF1000}
|
||||
\end{itemize}
|
||||
We can \alert{evaluate both expressions} and \alert{compare}
|
||||
}}
|
||||
}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Dynamic validation}
|
||||
\textbf{Abstract state}
|
||||
\begin{itemize}
|
||||
\item \alert{Stack} of actual \alert{addresses} where
|
||||
\alert{return addresses} are stored
|
||||
\end{itemize}
|
||||
|
||||
\vspace{2em}\pause{}
|
||||
|
||||
\textbf{Abstract instruction semantics}
|
||||
\begin{itemize}
|
||||
\item[\alert{\texttt{call}}] push \alert{\reg{rsp}} on the stack
|
||||
\item[\alert{\texttt{ret}}] pop from the stack
|
||||
\end{itemize}
|
||||
|
||||
\vspace{2em}\pause{}
|
||||
|
||||
\textbf{Validation of each instruction}
|
||||
\begin{itemize}
|
||||
\item Evaluate the return address provided by DWARF
|
||||
\item Compare it with the value at the top of the stack
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{In practice: \texttt{eh\_frame\_check}}
|
||||
Strategy implemented and working: \alert{\texttt{eh\_frame\_check}}
|
||||
|
||||
\begin{itemize}
|
||||
\item \texttt{gdb} allows for Python instrumentation
|
||||
\vspace{1em}\pause{}
|
||||
\item Parse ELF and DWARF data (\texttt{pyelftools})
|
||||
\item Run the binary inside gdb
|
||||
\item Pause at each (assembly) step
|
||||
\item Jointly evaluate DWARF data and the abstract stack
|
||||
\item Report upon error
|
||||
\end{itemize}
|
||||
|
||||
\vspace{1em}
|
||||
Works, but… \alert{Python is slow}!
|
||||
|
||||
A few thousand of ASM instructions/second (good enough)
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{A real bug!}
|
||||
\begin{columns}[c]
|
||||
\begin{column}{0.65\textwidth}
|
||||
\lstinputlisting[language=C]{src/llvm_bug.c}
|
||||
\end{column} \begin{column}{0.35\textwidth}
|
||||
\textbf{\texttt{CSmith}\\+ \texttt{Creduce}\\+
|
||||
\texttt{eh\_frame\_check}}
|
||||
|
||||
\vspace{2em}$\leadsto$ \alert{\bf LLVM (3.8) bug!}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\renewcommand{\tblrowval}[5]{#1 & #2 & #3 & #4 & #5 \\}
|
||||
\begin{frame}{A real bug!}
|
||||
|
||||
\begin{columns}[T]
|
||||
\column{0.7\textwidth}
|
||||
\begin{align*}
|
||||
\only<2->{\textbf{Abstract state} \qquad &
|
||||
\left[\texttt{0xFFFF1000}\right]\\}
|
||||
\only<3->{\reg{rsp} \qquad &
|
||||
\texttt{0xFFFF1000}}
|
||||
\end{align*}
|
||||
|
||||
\column{0.3\textwidth}
|
||||
Blah ?
|
||||
% Add check - cross
|
||||
\end{columns}
|
||||
|
||||
|
||||
\vspace{1em}
|
||||
|
||||
\begin{table}
|
||||
\ttfamily\large
|
||||
\begin{tabularx}{0.95\linewidth}{
|
||||
l
|
||||
l
|
||||
b
|
||||
>{\columncolor{SkyBlue}}s
|
||||
>{\columncolor{SkyBlue}}s
|
||||
}
|
||||
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{}{\textbf{CFA}}{\textbf{ra}}
|
||||
%\rowonly<3>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8}
|
||||
\rowonly<2-4>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8}
|
||||
\rowonly<5-6>{\tblhl{}} \tblrowval{}{}{}{rsp+16}{c-8}
|
||||
\tblrowval{}{[\ldots]}{}{}{}
|
||||
\tblrowval{}{}{}{}{}
|
||||
\rowonly<7-8>{\tblhl{}} \tblrowval{40061d}{pop}{\%rbx}{rsp+16}{c-8}
|
||||
\rowonly<9-10>{\tblhl{}} \tblrowval{40061e}{retq}{}{rsp+16}{c-8}
|
||||
\end{tabularx}
|
||||
\end{table}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{TODO}
|
||||
\todo{}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Unwinding data synthesis from assembly}
|
||||
|
||||
\begin{frame}{TODO}
|
||||
\todo{}
|
||||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Unwinding data compilation}
|
||||
|
||||
\subsection{Compilation ahead-of-time}
|
||||
|
||||
\begin{frame}{Compilation overview}
|
||||
\begin{itemize}
|
||||
|
@ -271,8 +522,6 @@ $1 = 84
|
|||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Example}
|
||||
|
||||
\begin{frame}{Compilation example: original C, DWARF}
|
||||
\lstinputlisting[language=C]{src/fib7/fib7.cfde}
|
||||
\end{frame}
|
||||
|
@ -283,7 +532,6 @@ $1 = 84
|
|||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Compilation Strategy}
|
||||
|
||||
\begin{frame}{Compilation choices}
|
||||
\textbf{In order to keep the compiler \alert{simple} and \alert{easily
|
||||
|
@ -328,7 +576,6 @@ $1 = 84
|
|||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\subsection{Outlining}
|
||||
|
||||
\begin{frame}{Size optimisation: outlining}
|
||||
\begin{itemize}
|
||||
|
@ -361,7 +608,6 @@ $1 = 84
|
|||
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_outline.c}
|
||||
\end{frame}
|
||||
|
||||
\subsection{A word on formalization}
|
||||
|
||||
\begin{frame}[t]{A word on formalization}
|
||||
\begin{itemize}
|
||||
|
@ -381,7 +627,7 @@ $1 = 84
|
|||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Benchmarking}
|
||||
\subsection{Benchmarking}
|
||||
|
||||
\begin{frame}{Benchmarking requirements}
|
||||
\begin{enumerate}
|
||||
|
@ -422,7 +668,7 @@ $1 = 84
|
|||
\end{frame}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section{Results}
|
||||
\subsection{Results}
|
||||
|
||||
\begin{frame}{Time performance}
|
||||
\small
|
||||
|
@ -480,7 +726,6 @@ $1 = 84
|
|||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\section*{}
|
||||
\setcounter{section}{0}
|
||||
|
|
13
src/llvm_bug.c
Normal file
13
src/llvm_bug.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
short a,b,g;
|
||||
long c;
|
||||
char d;
|
||||
int e, f;
|
||||
|
||||
void main() {
|
||||
for(; f; f++)
|
||||
for(; e; e++)
|
||||
for(; c; c++) {
|
||||
g = a % b;
|
||||
for(; d <= 1; d++);
|
||||
}
|
||||
}
|
|
@ -10,3 +10,15 @@
|
|||
|
||||
\newcommand{\tnhead}[2]{\multicolumn{1}{#1}{#2}} % Table neutral head
|
||||
\newcommand{\spaced}[2]{\hspace{#1} #2 \hspace{#1}}
|
||||
|
||||
\makeatletter
|
||||
\newcommand{\rowonly}{%
|
||||
\noalign{\ifnum0=`}\fi
|
||||
\@rowonly
|
||||
}
|
||||
\newcommand<>{\@rowonly}[1]{%
|
||||
\only#2%
|
||||
{\ifnum0=`{\fi}#1{\ifnum0=`}\fi}%
|
||||
\ifnum0=`{\fi}%
|
||||
\ignorespaces
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue