Work on synthesis, "A real bug!" to be finished

This commit is contained in:
Théophile Bastian 2018-11-05 16:19:19 +01:00
parent f2b188bd7a
commit 420eeb6a30
3 changed files with 291 additions and 21 deletions

View file

@ -12,9 +12,9 @@
\usepackage{makecell}
\usepackage{ifthen}
\usepackage{colortbl}
\usepackage{tabularx}
\usepackage{texlib/my_listings}
%\usepackage{texlib/my_hyperref}
\usepackage{texlib/specific}
\usepackage{texlib/common}
\usepackage{texlib/todo}
@ -54,15 +54,25 @@
sensitive=false,
}
\newcolumntype{b}{X}
\newcolumntype{s}{>{\hsize=.43\hsize}X}
\newcommand{\lstinl}
{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\author[\slidecountline]{Théophile \textsc{Bastian} \\
\footnotesize{under the supervision of Francesco Zappa Nardelli}}
\title[\sectionline]
{Speeding up stack unwinding by compiling DWARF debug data}
\date{March\ --\ August 2018}
\title[\sectionline] {Growing the DWARF tougher:\\
synthesis, validation and compilation}
\author[\slidecountline]{\textbf{Théophile Bastian} \\ \vspace{0.5em}
{{\footnotesize{}Based on work done with}\\
\textbf{Francesco Zappa Nardelli},
\textbf{Stephen Kell},
\textbf{Simon Ser}}}
\date{}
%\subject{}
%\logo{}
\institute{Team PARKAS, INRIA, Paris}
\institute{ENS Paris, INRIA}
\begin{document}
@ -72,6 +82,7 @@
\vspace{-2em}
\begin{center}
\todo{}
\begin{align*}
\text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\
\text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}}
@ -85,7 +96,7 @@
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Stack unwinding data}
\section{DWARF and stack unwinding data}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Introduction}
@ -214,9 +225,11 @@ $1 = 84
\pause{}
\vspace{-5.5cm}
\vspace{-6.5cm}
\begin{center}
\bf \fontsize{8cm}{1cm}\colorbox{white}{\alert{Slow!}}
\bf \fontsize{8cm}{1cm}
\colorbox{white}{\alert{Complex}} \\
\colorbox{white}{\alert{\& slow!}}
\end{center}
\end{frame}
@ -249,10 +262,248 @@ $1 = 84
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Compiling stack unwinding data ahead-of-time}
\begin{frame}{Difficult to generate}
Major concern with DWARF\@: it is \alert{difficult to generate}
(correctly).
\subsection*{}
\begin{itemize}
\item{} \alert{Hard to generate}: each compiler pass must keep it in
sync
\item{} Most of it is \alert{seldom used} (\eg{} unwinding data of
dusty code), and thus \alert{seldom tested}
\end{itemize}
\vspace{1em}
Yields to
\begin{itemize}
\item{} unreliable DWARF\@: can cause headaches when debugging
\item{} or not generated at all (eg. OCaml until recently) \todo{Check
this}
\end{itemize}
\vspace{1em}
\begin{center}
\Large\bf
$\leadsto$ Complex, buggy, untested
\end{center}
\end{frame}
\begin{frame}{A debugging hell: Linux kernel}
``Sorry, but last time was too f\dots painful. The whole (and
only) point of unwinders is to make debugging easy
when a bug occurs. But \alert{the dwarf unwinder had bugs}
itself, or \alert{our dwarf information had bugs}, and in either
case it actually turned several trivial bugs into a \alert{total
undebuggable hell}.''
\vspace{1em}
\only<1>{\hfill ---~Linus Torvalds, Kernel mailing list, 2012}
\pause{}
``If you can \alert{mathematically prove that the unwinder is
correct} — even in the presence of bogus and actively
incorrect unwinding information — and never ever
follows a bad pointer, \alert{Ill reconsider}.''
\vspace{1em}
\hfill ---~Linus Torvalds, Kernel mailing list, 2012
\pause{}\vspace{1em}
\begin{center}
\Large\bf
\alert{This is where we still are!}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data validation}
\begin{frame}{Main idea}
\begin{itemize}
\item If we follow \alert{one path of execution}, we can track the
return address location
\item If we read unwinding data \alert{at runtime}, we can \alert{check
the RA consistency} at each step
\end{itemize}
\end{frame}
\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\}
\newcommand{\blknote}[1]
{\begin{block}{}
\centering\large
#1
\end{block}}
\newcommand{\blklnote}[1]
{\begin{block}{}
\large
#1
\end{block}}
\newcommand{\tblhl}{\rowcolor{Tan}}
\begin{frame}{Example}
\begin{table}
\ttfamily\large
\begin{tabularx}{0.9\linewidth}{
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}}
\rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8}
\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8}
\rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8}
\rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8}
\tblrowval{push}{\%r12}{rsp+32}{c-8}
\tblrowval{push}{\%rbp}{rsp+40}{c-8}
\tblrowval{push}{\%rbx}{rsp+48}{c-8}
\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8}
\rowonly<9>{\tblhl{}} \tblrowval{cmp}{\$0x1,\%edi}{rsp+160}{c-8}%
\only<-8>{%
\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8}
\tblrowval{pop}{\%rbx}{rsp+56}{c-8}
\tblrowval{pop}{\%rbp}{rsp+48}{c-8}}
\end{tabularx}
\end{table}
\only<-8> {
\only<3>{\blknote{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}}
\only<4>{\blknote{\texttt{push} decreases \reg{rsp} by 8: %
\alert{ra = *(\reg{rsp} + 8)}}}
\only<5>{\blknote{and again: %
\alert{ra = *(\reg{rsp} + 16)}}}
\only<6>{\blknote{This \texttt{mov} leaves \reg{rsp} untouched: %
\alert{ra = *(\reg{rsp} + 16)}}}
\only<7>{\blknote{The unwinding table can actually be seen as\\
an \alert{abstract interpretation} of the code\ldots}}
\only<8>{\blknote{\ldots and thus, for a given run, be
\alert{re-computed for verification}}}
}
\only<9-> {
\vspace{-2em}
\only<9>{\blklnote{If, within an execution,
\begin{itemize}
\item ra = \texttt{*(0xFFFF1098)}
\item \reg{rsp} = \texttt{0xFFFF1000}
\end{itemize}
We can \alert{evaluate both expressions} and \alert{compare}
}}
}
\end{frame}
\begin{frame}{Dynamic validation}
\textbf{Abstract state}
\begin{itemize}
\item \alert{Stack} of actual \alert{addresses} where
\alert{return addresses} are stored
\end{itemize}
\vspace{2em}\pause{}
\textbf{Abstract instruction semantics}
\begin{itemize}
\item[\alert{\texttt{call}}] push \alert{\reg{rsp}} on the stack
\item[\alert{\texttt{ret}}] pop from the stack
\end{itemize}
\vspace{2em}\pause{}
\textbf{Validation of each instruction}
\begin{itemize}
\item Evaluate the return address provided by DWARF
\item Compare it with the value at the top of the stack
\end{itemize}
\end{frame}
\begin{frame}{In practice: \texttt{eh\_frame\_check}}
Strategy implemented and working: \alert{\texttt{eh\_frame\_check}}
\begin{itemize}
\item \texttt{gdb} allows for Python instrumentation
\vspace{1em}\pause{}
\item Parse ELF and DWARF data (\texttt{pyelftools})
\item Run the binary inside gdb
\item Pause at each (assembly) step
\item Jointly evaluate DWARF data and the abstract stack
\item Report upon error
\end{itemize}
\vspace{1em}
Works, but… \alert{Python is slow}!
A few thousand of ASM instructions/second (good enough)
\end{frame}
\begin{frame}{A real bug!}
\begin{columns}[c]
\begin{column}{0.65\textwidth}
\lstinputlisting[language=C]{src/llvm_bug.c}
\end{column} \begin{column}{0.35\textwidth}
\textbf{\texttt{CSmith}\\+ \texttt{Creduce}\\+
\texttt{eh\_frame\_check}}
\vspace{2em}$\leadsto$ \alert{\bf LLVM (3.8) bug!}
\end{column}
\end{columns}
\end{frame}
\renewcommand{\tblrowval}[5]{#1 & #2 & #3 & #4 & #5 \\}
\begin{frame}{A real bug!}
\begin{columns}[T]
\column{0.7\textwidth}
\begin{align*}
\only<2->{\textbf{Abstract state} \qquad &
\left[\texttt{0xFFFF1000}\right]\\}
\only<3->{\reg{rsp} \qquad &
\texttt{0xFFFF1000}}
\end{align*}
\column{0.3\textwidth}
Blah ?
% Add check - cross
\end{columns}
\vspace{1em}
\begin{table}
\ttfamily\large
\begin{tabularx}{0.95\linewidth}{
l
l
b
>{\columncolor{SkyBlue}}s
>{\columncolor{SkyBlue}}s
}
\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{}{\textbf{CFA}}{\textbf{ra}}
%\rowonly<3>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8}
\rowonly<2-4>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8}
\rowonly<5-6>{\tblhl{}} \tblrowval{}{}{}{rsp+16}{c-8}
\tblrowval{}{[\ldots]}{}{}{}
\tblrowval{}{}{}{}{}
\rowonly<7-8>{\tblhl{}} \tblrowval{40061d}{pop}{\%rbx}{rsp+16}{c-8}
\rowonly<9-10>{\tblhl{}} \tblrowval{40061e}{retq}{}{rsp+16}{c-8}
\end{tabularx}
\end{table}
\end{frame}
\begin{frame}{TODO}
\todo{}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data synthesis from assembly}
\begin{frame}{TODO}
\todo{}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Unwinding data compilation}
\subsection{Compilation ahead-of-time}
\begin{frame}{Compilation overview}
\begin{itemize}
@ -271,8 +522,6 @@ $1 = 84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Example}
\begin{frame}{Compilation example: original C, DWARF}
\lstinputlisting[language=C]{src/fib7/fib7.cfde}
\end{frame}
@ -283,7 +532,6 @@ $1 = 84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Compilation Strategy}
\begin{frame}{Compilation choices}
\textbf{In order to keep the compiler \alert{simple} and \alert{easily
@ -328,7 +576,6 @@ $1 = 84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Outlining}
\begin{frame}{Size optimisation: outlining}
\begin{itemize}
@ -361,7 +608,6 @@ $1 = 84
\lstinputlisting[language=C]{src/fib7/fib7.eh_elf_outline.c}
\end{frame}
\subsection{A word on formalization}
\begin{frame}[t]{A word on formalization}
\begin{itemize}
@ -381,7 +627,7 @@ $1 = 84
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Benchmarking}
\subsection{Benchmarking}
\begin{frame}{Benchmarking requirements}
\begin{enumerate}
@ -422,7 +668,7 @@ $1 = 84
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Results}
\subsection{Results}
\begin{frame}{Time performance}
\small
@ -480,7 +726,6 @@ $1 = 84
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{}
\setcounter{section}{0}

13
src/llvm_bug.c Normal file
View file

@ -0,0 +1,13 @@
short a,b,g;
long c;
char d;
int e, f;
void main() {
for(; f; f++)
for(; e; e++)
for(; c; c++) {
g = a % b;
for(; d <= 1; d++);
}
}

View file

@ -10,3 +10,15 @@
\newcommand{\tnhead}[2]{\multicolumn{1}{#1}{#2}} % Table neutral head
\newcommand{\spaced}[2]{\hspace{#1} #2 \hspace{#1}}
\makeatletter
\newcommand{\rowonly}{%
\noalign{\ifnum0=`}\fi
\@rowonly
}
\newcommand<>{\@rowonly}[1]{%
\only#2%
{\ifnum0=`{\fi}#1{\ifnum0=`}\fi}%
\ifnum0=`{\fi}%
\ignorespaces
}