From 420eeb6a30706cb65b325cd1333f6b888bec49d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Mon, 5 Nov 2018 16:19:19 +0100 Subject: [PATCH] Work on synthesis, "A real bug!" to be finished --- slides.tex | 287 ++++++++++++++++++++++++++++++++++++++++++---- src/llvm_bug.c | 13 +++ texlib/common.sty | 12 ++ 3 files changed, 291 insertions(+), 21 deletions(-) create mode 100644 src/llvm_bug.c diff --git a/slides.tex b/slides.tex index d7dc6d6..19d81be 100644 --- a/slides.tex +++ b/slides.tex @@ -12,9 +12,9 @@ \usepackage{makecell} \usepackage{ifthen} \usepackage{colortbl} +\usepackage{tabularx} \usepackage{texlib/my_listings} -%\usepackage{texlib/my_hyperref} \usepackage{texlib/specific} \usepackage{texlib/common} \usepackage{texlib/todo} @@ -54,15 +54,25 @@ sensitive=false, } + +\newcolumntype{b}{X} +\newcolumntype{s}{>{\hsize=.43\hsize}X} + +\newcommand{\lstinl} +{\lstinline[language=C, keepspaces=true, basicstyle=\ttfamily]} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\author[\slidecountline]{Théophile \textsc{Bastian} \\ - \footnotesize{under the supervision of Francesco Zappa Nardelli}} -\title[\sectionline] - {Speeding up stack unwinding by compiling DWARF debug data} -\date{March\ --\ August 2018} +\title[\sectionline] {Growing the DWARF tougher:\\ + synthesis, validation and compilation} +\author[\slidecountline]{\textbf{Théophile Bastian} \\ \vspace{0.5em} +{{\footnotesize{}Based on work done with}\\ +\textbf{Francesco Zappa Nardelli}, +\textbf{Stephen Kell}, +\textbf{Simon Ser}}} +\date{} %\subject{} %\logo{} -\institute{Team PARKAS, INRIA, Paris} +\institute{ENS Paris, INRIA} \begin{document} @@ -72,6 +82,7 @@ \vspace{-2em} \begin{center} + \todo{} \begin{align*} \text{Slides: } &\text{\url{https://tobast.fr/m2/slides.pdf}} \\ \text{Report: } &\text{\url{https://tobast.fr/m2/report.pdf}} @@ -85,7 +96,7 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Stack unwinding data} +\section{DWARF and stack unwinding data} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Introduction} @@ -214,9 +225,11 @@ $1 = 84 \pause{} - \vspace{-5.5cm} + \vspace{-6.5cm} \begin{center} - \bf \fontsize{8cm}{1cm}\colorbox{white}{\alert{Slow!}} + \bf \fontsize{8cm}{1cm} + \colorbox{white}{\alert{Complex}} \\ + \colorbox{white}{\alert{\& slow!}} \end{center} \end{frame} @@ -249,10 +262,248 @@ $1 = 84 \end{center} \end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Compiling stack unwinding data ahead-of-time} +\begin{frame}{Difficult to generate} + Major concern with DWARF\@: it is \alert{difficult to generate} + (correctly). -\subsection*{} + \begin{itemize} + \item{} \alert{Hard to generate}: each compiler pass must keep it in + sync + \item{} Most of it is \alert{seldom used} (\eg{} unwinding data of + dusty code), and thus \alert{seldom tested} + \end{itemize} + \vspace{1em} + + Yields to + \begin{itemize} + \item{} unreliable DWARF\@: can cause headaches when debugging + \item{} or not generated at all (eg. OCaml until recently) \todo{Check + this} + \end{itemize} + + \vspace{1em} + \begin{center} + \Large\bf + $\leadsto$ Complex, buggy, untested + \end{center} +\end{frame} + +\begin{frame}{A debugging hell: Linux kernel} + ``Sorry, but last time was too f\dots painful. The whole (and + only) point of unwinders is to make debugging easy + when a bug occurs. But \alert{the dwarf unwinder had bugs} + itself, or \alert{our dwarf information had bugs}, and in either + case it actually turned several trivial bugs into a \alert{total + undebuggable hell}.'' + + \vspace{1em} + \only<1>{\hfill ---~Linus Torvalds, Kernel mailing list, 2012} + \pause{} + + ``If you can \alert{mathematically prove that the unwinder is + correct} — even in the presence of bogus and actively + incorrect unwinding information — and never ever + follows a bad pointer, \alert{I’ll reconsider}.'' + + \vspace{1em} + \hfill ---~Linus Torvalds, Kernel mailing list, 2012 + + \pause{}\vspace{1em} + \begin{center} + \Large\bf + \alert{This is where we still are!} + \end{center} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Unwinding data validation} + +\begin{frame}{Main idea} + \begin{itemize} + \item If we follow \alert{one path of execution}, we can track the + return address location + \item If we read unwinding data \alert{at runtime}, we can \alert{check + the RA consistency} at each step + \end{itemize} +\end{frame} + +\newcommand{\tblrowval}[4]{#1 & #2 & \only<2->{#3} & \only<2->{#4} \\} +\newcommand{\blknote}[1] + {\begin{block}{} + \centering\large + #1 + \end{block}} +\newcommand{\blklnote}[1] + {\begin{block}{} + \large + #1 + \end{block}} +\newcommand{\tblhl}{\rowcolor{Tan}} + +\begin{frame}{Example} + \begin{table} + \ttfamily\large + \begin{tabularx}{0.9\linewidth}{ + l + b + >{\columncolor{SkyBlue}}s + >{\columncolor{SkyBlue}}s + } +\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{\textbf{CFA}}{\textbf{ra}} +\rowonly<3>{\tblhl{}} \tblrowval{push}{\%r15}{rsp+8}{c-8} +\rowonly<4>{\tblhl{}} \tblrowval{push}{\%r14}{rsp+16}{c-8} +\rowonly<5>{\tblhl{}} \tblrowval{mov}{\$0x3,\%eax}{rsp+24}{c-8} +\rowonly<6>{\tblhl{}} \tblrowval{push}{\%r13}{rsp+24}{c-8} +\tblrowval{push}{\%r12}{rsp+32}{c-8} +\tblrowval{push}{\%rbp}{rsp+40}{c-8} +\tblrowval{push}{\%rbx}{rsp+48}{c-8} +\tblrowval{sub}{\$0x68,\%rsp}{rsp+56}{c-8} +\rowonly<9>{\tblhl{}} \tblrowval{cmp}{\$0x1,\%edi}{rsp+160}{c-8}% +\only<-8>{% +\tblrowval{add}{\$0x68,\%rsp}{rsp+160}{c-8} +\tblrowval{pop}{\%rbx}{rsp+56}{c-8} +\tblrowval{pop}{\%rbp}{rsp+48}{c-8}} + \end{tabularx} + \end{table} + +\only<-8> { + \only<3>{\blknote{Upon function call, \alert{ra = *(\reg{rsp})} (ABI)}} + \only<4>{\blknote{\texttt{push} decreases \reg{rsp} by 8: % + \alert{ra = *(\reg{rsp} + 8)}}} + \only<5>{\blknote{and again: % + \alert{ra = *(\reg{rsp} + 16)}}} + \only<6>{\blknote{This \texttt{mov} leaves \reg{rsp} untouched: % + \alert{ra = *(\reg{rsp} + 16)}}} + \only<7>{\blknote{The unwinding table can actually be seen as\\ + an \alert{abstract interpretation} of the code\ldots}} + \only<8>{\blknote{\ldots and thus, for a given run, be + \alert{re-computed for verification}}} + } +\only<9-> { + \vspace{-2em} + \only<9>{\blklnote{If, within an execution, + \begin{itemize} + \item ra = \texttt{*(0xFFFF1098)} + \item \reg{rsp} = \texttt{0xFFFF1000} + \end{itemize} + We can \alert{evaluate both expressions} and \alert{compare} + }} +} +\end{frame} + +\begin{frame}{Dynamic validation} + \textbf{Abstract state} + \begin{itemize} + \item \alert{Stack} of actual \alert{addresses} where + \alert{return addresses} are stored + \end{itemize} + + \vspace{2em}\pause{} + + \textbf{Abstract instruction semantics} + \begin{itemize} + \item[\alert{\texttt{call}}] push \alert{\reg{rsp}} on the stack + \item[\alert{\texttt{ret}}] pop from the stack + \end{itemize} + + \vspace{2em}\pause{} + + \textbf{Validation of each instruction} + \begin{itemize} + \item Evaluate the return address provided by DWARF + \item Compare it with the value at the top of the stack + \end{itemize} +\end{frame} + +\begin{frame}{In practice: \texttt{eh\_frame\_check}} + Strategy implemented and working: \alert{\texttt{eh\_frame\_check}} + + \begin{itemize} + \item \texttt{gdb} allows for Python instrumentation + \vspace{1em}\pause{} + \item Parse ELF and DWARF data (\texttt{pyelftools}) + \item Run the binary inside gdb + \item Pause at each (assembly) step + \item Jointly evaluate DWARF data and the abstract stack + \item Report upon error + \end{itemize} + + \vspace{1em} + Works, but… \alert{Python is slow}! + + A few thousand of ASM instructions/second (good enough) +\end{frame} + +\begin{frame}{A real bug!} + \begin{columns}[c] + \begin{column}{0.65\textwidth} + \lstinputlisting[language=C]{src/llvm_bug.c} + \end{column} \begin{column}{0.35\textwidth} + \textbf{\texttt{CSmith}\\+ \texttt{Creduce}\\+ + \texttt{eh\_frame\_check}} + + \vspace{2em}$\leadsto$ \alert{\bf LLVM (3.8) bug!} + \end{column} + \end{columns} +\end{frame} + +\renewcommand{\tblrowval}[5]{#1 & #2 & #3 & #4 & #5 \\} +\begin{frame}{A real bug!} + + \begin{columns}[T] + \column{0.7\textwidth} + \begin{align*} + \only<2->{\textbf{Abstract state} \qquad & + \left[\texttt{0xFFFF1000}\right]\\} + \only<3->{\reg{rsp} \qquad & + \texttt{0xFFFF1000}} + \end{align*} + + \column{0.3\textwidth} + Blah ? + % Add check - cross + \end{columns} + + + \vspace{1em} + + \begin{table} + \ttfamily\large + \begin{tabularx}{0.95\linewidth}{ + l + l + b + >{\columncolor{SkyBlue}}s + >{\columncolor{SkyBlue}}s + } +\tblrowval{\hspace{-2ex}<{\bf foo}>:}{}{}{\textbf{CFA}}{\textbf{ra}} +%\rowonly<3>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8} +\rowonly<2-4>{\tblhl{}} \tblrowval{4004e0}{push}{\%rbx}{rsp+8}{c-8} +\rowonly<5-6>{\tblhl{}} \tblrowval{}{}{}{rsp+16}{c-8} +\tblrowval{}{[\ldots]}{}{}{} +\tblrowval{}{}{}{}{} +\rowonly<7-8>{\tblhl{}} \tblrowval{40061d}{pop}{\%rbx}{rsp+16}{c-8} +\rowonly<9-10>{\tblhl{}} \tblrowval{40061e}{retq}{}{rsp+16}{c-8} + \end{tabularx} + \end{table} + +\end{frame} + +\begin{frame}{TODO} + \todo{} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Unwinding data synthesis from assembly} + +\begin{frame}{TODO} + \todo{} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Unwinding data compilation} + +\subsection{Compilation ahead-of-time} \begin{frame}{Compilation overview} \begin{itemize} @@ -271,8 +522,6 @@ $1 = 84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Example} - \begin{frame}{Compilation example: original C, DWARF} \lstinputlisting[language=C]{src/fib7/fib7.cfde} \end{frame} @@ -283,7 +532,6 @@ $1 = 84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Compilation Strategy} \begin{frame}{Compilation choices} \textbf{In order to keep the compiler \alert{simple} and \alert{easily @@ -328,7 +576,6 @@ $1 = 84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Outlining} \begin{frame}{Size optimisation: outlining} \begin{itemize} @@ -361,7 +608,6 @@ $1 = 84 \lstinputlisting[language=C]{src/fib7/fib7.eh_elf_outline.c} \end{frame} -\subsection{A word on formalization} \begin{frame}[t]{A word on formalization} \begin{itemize} @@ -381,7 +627,7 @@ $1 = 84 \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Benchmarking} +\subsection{Benchmarking} \begin{frame}{Benchmarking requirements} \begin{enumerate} @@ -422,7 +668,7 @@ $1 = 84 \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Results} +\subsection{Results} \begin{frame}{Time performance} \small @@ -480,7 +726,6 @@ $1 = 84 \end{center} \end{frame} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{} \setcounter{section}{0} diff --git a/src/llvm_bug.c b/src/llvm_bug.c new file mode 100644 index 0000000..5b9fb8c --- /dev/null +++ b/src/llvm_bug.c @@ -0,0 +1,13 @@ +short a,b,g; +long c; +char d; +int e, f; + +void main() { + for(; f; f++) + for(; e; e++) + for(; c; c++) { + g = a % b; + for(; d <= 1; d++); + } +} diff --git a/texlib/common.sty b/texlib/common.sty index 90aaae4..52946a0 100644 --- a/texlib/common.sty +++ b/texlib/common.sty @@ -10,3 +10,15 @@ \newcommand{\tnhead}[2]{\multicolumn{1}{#1}{#2}} % Table neutral head \newcommand{\spaced}[2]{\hspace{#1} #2 \hspace{#1}} + +\makeatletter +\newcommand{\rowonly}{% + \noalign{\ifnum0=`}\fi + \@rowonly +} +\newcommand<>{\@rowonly}[1]{% + \only#2% + {\ifnum0=`{\fi}#1{\ifnum0=`}\fi}% + \ifnum0=`{\fi}% + \ignorespaces +}