phd-defense/slides/50_staticdeps/main.tex

291 lines
8.9 KiB
TeX

\section{\staticdeps: static extraction of memory-carried dependencies}
\begin{frame}[fragile]{Dependencies, through registers}
\begin{minipage}[c]{0.35\textwidth}
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
0: mov (%rax), (§\alert{\reg{rcx}}§)
...
3: add (§\alert{\reg{rcx}}§), %rdx
\end{lstlisting}
\end{minipage}\hfill
\begin{minipage}[c]{0.5\textwidth}
\begin{itemize}
\item Track register writes
\item Output dependency upon read
\end{itemize}
\end{minipage}
\begin{center}
0 $\to$ 3 through \reg{rcx}
\end{center}
\end{frame}
\begin{frame}[fragile]{Dependencies, loop-carried}
\begin{minipage}[c]{0.40\textwidth}
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
loop:
0: add (§\alert{\reg{rcx}}§), %rdx
...
3: mov (%rax), (§\alert{\reg{rcx}}§)
6: jmp loop
\end{lstlisting}
\end{minipage}\hfill
\begin{minipage}[c]{0.1\textwidth}
$\longrightarrow$
\end{minipage}\hfill
\begin{minipage}[c]{0.40\textwidth}
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
0: add %rcx, %rdx
...
3: mov (%rax), (§\alert{\reg{rcx}}§)
0: add (§\alert{\reg{rcx}}§), %rdx
...
3: mov (%rax), %rcx
\end{lstlisting}
\end{minipage}
\begin{center}
3 $\to$ 0 through \reg{rcx}, \alert{loop-carried}
\end{center}
\end{frame}
\begin{frame}[fragile]{Dependencies, through memory}
\begin{minipage}[c]{0.30\textwidth}
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
mov %r10, (§\alert{4(\reg{rax})}§)
add $4, %rax
add (§\alert{(\reg{rax})}§), %rbx
\end{lstlisting}
\end{minipage}\hfill
\begin{minipage}[c]{0.68\textwidth}
\begin{itemize}
\item Through memory: indirections, arithmetics, …
\item Requires comparison of arbitrary symbolic expressions
\medskip{}
\item Use randomness as a kind of hash table instead
\item Loop-carried: luckily, ROB is finite and small
\end{itemize}
\end{minipage}
\pause{}
\bigskip
\begin{center}
\textbf{Hypothesis:} pointers from context \alert{do not
alias}.\\
Compilers prefer passing a single pointer.
\end{center}
\end{frame}
\begin{frame}{The \staticdeps{} algorithm}
\begin{itemize}
\item \alert{Unroll} kernel until $\card{\kerK} \geq \card{\text{ROB}} +
\card{\kerK_0}$
\item \alert{Simulate} execution
\item Unknown value (reg./mem.)? \alert{Sample} uniformly in $0\ldots2^{64}-1$
(\alert{``fresh''})
\item \alert{Compute arithmetics} normally (overflow is fine)
\item Float or unknown operands $\leadsto \alert{\bot}$
\item Upon write, remember from which instruction
\item Upon read, if writer known, \alert{output dependency}
\end{itemize}
\end{frame}
\begin{frame}[fragile]{An example: memoized Fibonacci sequence}
\begin{minipage}[c]{0.46\textwidth}
\begin{lstlisting}[language=C]
int fibo(int* F, int n) {
for(int i=2; i <= n; ++i) {
F[i] = F[i-1] + F[i-2];
}
return F[n];
}
\end{lstlisting}
\end{minipage}\hfill\begin{minipage}[c]{0.06\textwidth}
\contour{black}{$\longrightarrow$}
\end{minipage}\hfill\begin{minipage}[c]{0.40\textwidth}
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
0: mov (%rax),%edx
1: add 0x4(%rax),%edx
2: mov %edx,0x8(%rax)
3: add $0x4,%rax
4: cmp %rcx,%rax
5: jne 0
\end{lstlisting}
\end{minipage}
\end{frame}
\begin{frame}[fragile]
\vspace{1cm}
\newcommand{\unk}{{\color{gray}?}}
\newcommand{\h}{\cellcolor[HTML]{D0ECFF}}
\newcommand{\w}{\cellcolor[HTML]{d6bf86}}
\newcommand{\dep}[1]{{\color{red}#1\,$\to$}}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\hfill\begin{minipage}{0.29\textwidth}
{\footnotesize
\begin{tabular}{c c c}
Mem. read\h & & Mem. write\w \\
\end{tabular}
}
\vspace{1em}
\vfill
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
0: mov (%rax),%edx
1: add 0x4(%rax),%edx
2: mov %edx,0x8(%rax)
3: add $0x4,%rax
4: cmp %rcx,%rax
5: jne 0
\end{lstlisting}
\end{minipage}\hfill
\begin{minipage}{0.69\textwidth}
\centering
\footnotesize
\begin{tabular}{c c c c c c c c c l}
\toprule
\textbf{After} & \multicolumn{2}{c}{\textbf{Registers}} &&
\multicolumn{5}{c}{\textbf{Memory}} & \textbf{Dep}\\
\textbf{instr} & \reg{rax} & \reg{edx}
&& \texttt{100} & \texttt{104} & \texttt{108} & \texttt{112} & \texttt{116} & \\
\midrule
Start & \unk& \unk&& \unk & \unk & \unk & \unk & \unk & \\
\midrule
\pause{}
0 & 100 & 200 && 200\h& \unk & \unk & \unk & \unk & \\
\pause{}
1 & 100 & 376 && 200 & 176\h& \unk & \unk & \unk & \\
\pause{}
2 & 100 & 376 && 200 & 176 & 376\w& \unk & \unk & \\
\pause{}
3 & \alert{104} & 376 && 200 & 176 & 376 & \unk & \unk{} & \\{} % Oskour.
4 & 104 & 376 && 200 & 176 & 376 & \unk & \unk & \\
\midrule
\pause{}
0 & 104 & \alert{176} && 200 & 176\h& 376 & \unk & \unk & \\
\pause{}
1 & 104 & \alert{552} && 200 & 176 & 376\h& \unk & \unk & \dep{-1,2}\\
\pause{}
2 & 104 & 552 && 200 & 176 & 376 & 552\w& \unk & \\
\midrule
\pause{}
0 & 108 & \alert{376} && 200 & 176 & 376\h& 552 & \unk & \dep{-2,2}\\
\pause{}
1 & 108 & \alert{928} && 200 & 176 & 376 & 552\h& \unk & \dep{-1,2}\\
\pause{}
2 & 108 & 928 && 200 & 176 & 376 & 552 & 928\w &\\
\bottomrule{}
\end{tabular}
\end{minipage}\hfill
\end{columns}
\let\unk\unefined
\let\h\unefined
\let\w\unefined
\let\dep\unefined
\end{frame}
\begin{frame}{Practical implementation}
\begin{itemize}
\item Python code
\item Reads asm / elf / symbol in elf
\item Disassembly: \texttt{capstone}
\item Semantics: \texttt{VEX} (aka Valgrind)
\end{itemize}
\begin{center}
$\leadsto$ fast; supports many architectures
\end{center}
\end{frame}
\begin{frame}{Limitations}
\begin{itemize}
\item Randomness may generate false positives
\begin{itemize}
\item Very unlikely: $2^{64}$ vs. $\sim~10^{4}$
\item If needed, amplify (run twice)
\end{itemize}
\item No false negatives caused by randomness, however
\bigskip
\item Unaware of context: \emph{assumes no pointers alias}
\begin{itemize}
\item Intrinsic limitation of block-based code analyzers
\item Future works: information from
\begin{itemize}
\item the compiler?
\item a light instrumentation pass?
\end{itemize}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Evaluation: coverage}
\begin{itemize}
\item Baseline: instrumentation (extract deps at runtime)
\item Filter \textit{long-distance dependencies} ($> \card{\text{ROB}}$)
\item On all \cesasme{} benchmarks
\end{itemize}
\begin{minipage}{0.4\textwidth}
\[
\cov_u =
\frac{\card{\text{found}}}{\card{\text{found}}+\card{\text{missed}}}
\]
\end{minipage}\hfill
\begin{minipage}{0.4\textwidth}
\[
\cov_w =
\frac{\sum_{d\in\text{found}}\rho_d}
{\sum_{d\in\text{found}~\cup~\text{missed}}\rho_d}
\]
\end{minipage}
\vfill
\begin{center}
\begin{tabular}{r r}
\toprule
$\cov_u$ (\%) & $\cov_w$ (\%) \\
\midrule
\alert{94.4} & \alert{98.3} \\
\bottomrule
\end{tabular}
\end{center}
\end{frame}
\begin{frame}{Evaluation: \textit{points-to} analysis}
\begin{itemize}
\item Quantify whether $\exists p, q \in \text{context}$ pointing to
the \alert{same memory region} (``points-to'')
\item Proxy: if $i_0 \to i_1$, then $q \in i_1$ aliases $p \in i_0$
\item If $p = q$, we should catch it
\item If not: either \emph{long-distance} with $p=q$, or \alert{$p \neq
q$}!
\item[$\leadsto$] Keep long-distance dependencies; evaluate coverage on this
\end{itemize}
\begin{center}
\begin{tabular}{r r}
\toprule
$\cov_u$ (\%) & $\cov_w$ (\%) \\
\midrule
\alert{95.0} & \alert{93.7} \\
\bottomrule
\end{tabular}
\end{center}
\end{frame}
\begin{frame}{Evaluation: use in \uica}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\begin{minipage}[c]{0.5\textwidth}
\centering
\includegraphics[width=0.98\textwidth]{uica_sd_boxplot.svg}
\end{minipage}
\end{columns}
\end{frame}