phd-defense/slides/40_cesasme/main.tex

232 lines
7 KiB
TeX

\section{\cesasme: evaluate and compare state-of-the-art code analyzers}
\begin{frame}[fragile]
\begin{minipage}{0.6\textwidth}
\begin{center}
Matrix multiplication:
\end{center}
\begin{lstlisting}[language={[x86masm]Assembler}]
loop:
movsd (%rcx, %rax), %xmm0
mulsd %xmm1, %xmm0
addsd (%rdx, %rax), %xmm0
movsd %xmm0, (%rdx, %rax)
addq $8, %rax
cmpq $0x2260, %rax
jne loop\end{lstlisting}
\end{minipage}\hfill\vrule\hfill
\begin{minipage}{0.38\textwidth}
\begin{tabular}{l r}
\llvmmca{}: & 1.5 cycles/iter \\
\iaca{}: & 2.0 cycles/iter \\
\ithemal{}: & 2.0 cycles/iter \\
\uica{}: & 3.0 cycles/iter \\
\end{tabular}
\vspace{1em}
\begin{center}
\only<1>{\alert{\textbf{Which tool is correct?}}}
\only<2->{\textbf{Which tool is correct?}}
\end{center}
\end{minipage}
\vfill{}
\pause{}
\begin{center}
\textbf{We lack:}\\
\hfill\textbf{\alert{Benchmarks}}\hfill\textbf{\alert{Context}}\hfill~
\end{center}
\end{frame}
\begin{frame}{Generating benchmarks}
We need benchmarks\ldots \\
\vspace{1em}
{\def\arraystretch{1.2}
\newcommand{\litem}{\usebeamertemplate*{itemize item}\hspace{-0.5em}}
\begin{tabular}{rl l}
\litem{} & representative of scientific computation &
\visible<2->{\alert{Polybench}}\\
\litem{} & infinite, L1-resident loops &
\visible<3->{\alert{``microkernelification''} + verify}
\\
\litem{} & without control flow &
\visible<4->{\alert{Polybench}} \\
\litem{} & stressing diverse resources &
\visible<5->{\alert{Polyhedral transformations}}
\\
& & \visible<5->{+ \alert{unrolling} + \alert{compiler
options}} \\
\litem{} & plenty of them &
\visible<6->{\alert{Even more} of all those $\nnearrow$} \\
\end{tabular}
\let\litem\undefined
}
\begin{center}
\visible<6->{\textbf{\leadsto{} yields \alert{~3500} benchmarks}}
\end{center}
\end{frame}
\begin{frame}{In-context baseline: lifting predictions}
\begin{center}
\textbf{Consider instead $\kerK$ = \alert{full kernel}, with its
context\\
$\leadsto$ \alert{multiple} basic blocks}
\end{center}
\pause
\begin{itemize}
\item Measure total kernel time \textbf{in context}
\item Instrument full kernel $\kerK$: for each basic block,
$\operatorname{occur}(\text{bb})$
\item For each tool
\begin{itemize}
\item for each bb,
$\operatorname{prediction}(\text{bb})$
\item \emph{lift} predictions: \[
\operatorname{prediction}(\kerK) =
\sum_{\text{bb} \in \kerK}
\operatorname{occur}(\text{bb}) \times
\operatorname{prediction}(\text{bb})
\]
\end{itemize}
\end{itemize}
\vfill
\pause
\begin{center}
\textbf{Now we have a baseline.}
\end{center}
\end{frame}
\begin{frame}
\vspace{0.5cm}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\input{overview.tex}
\end{columns}
\end{frame}
\begin{frame}{First results (Intel Skylake on Grid5000)}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\begin{minipage}[c]{0.55\textwidth}
\centering
\footnotesize
\begin{tabular}{l r r r r r}
\toprule
\textbf{Bencher} & Failures &
\textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} & \textbf{Time}\\
& (\%) & (\%) & (\%) & & (CPU$\cdot$h) \\
\midrule
BHive & 37.20 & 27.95 & 23.01 & 0.81 & 1.37\\
llvm-mca & 0.00 & 36.71 & 59.80 & 0.57 & 0.96 \\
UiCA & 0.00 & 29.59 & 52.99 & 0.58 & 2.12 \\
Ithemal & 0.00 & 57.04 & 75.69 & 0.39 & 0.38 \\
Iaca & 0.00 & 30.23 & 57.18 & 0.59 & 1.31 \\
Gus & 0.00 & 20.37 & 30.59 & 0.82 & 188.04 \\
\bottomrule
\end{tabular}
\end{minipage}
\hfill\vrule\hfill
\begin{minipage}[c]{0.38\textwidth}
\centering
\includegraphics[width=\textwidth]{overall_analysis_boxplot.svg}\\
{\small\textit{Outliers > 250\,\% trimmed}}
\end{minipage}
\begin{center}
\textbf{\alert{Severely worse} than previous evaluations!}\\
\textbf{\hspace{0.7cm}Harness broken?\hfill{}Harder
benchmarks?\hfill{}Previously undetected weaknesses?\hspace{0.7cm}~}
\end{center}
\end{columns}
\end{frame}
\begin{frame}[fragile]{Searching for areas of improvement}
\begin{itemize}
\item{} Tools often wrong on the \emph{same} rows
\begin{itemize}
\item \llvmmca{}, \iaca{} and \uica{} share 80\,\% of their
worst 30\,\%
\end{itemize}
\item{} Often \texttt{-O1} rows
\end{itemize}
\begin{center}
\textbf{Crucial difference:}
\end{center}
\newcommand{\lsthlA}[1]{\texttt{\color[HTML]{df018a}#1}}
\newcommand{\lsthlB}[1]{\texttt{\color[HTML]{d88900}#1}}
\begin{minipage}[t]{0.47\textwidth}
\begin{center}
\textbf{{\color{red}Bad}\onslide<2->{: reduction}}
\end{center}
\vspace{-1em}
\begin{lstlisting}[language={[ANSI]C}]
for((§\lsthlA{c3}§))
tmp[(§\lsthlB{c1}§)] += A[c1][c3] * x[c3];
\end{lstlisting}
\end{minipage}
\hfill\vrule\hfill
\begin{minipage}[t]{0.47\textwidth}
\begin{center}
\textbf{{\color[HTML]{008f0c}Good}\onslide<2->{: map}}
\end{center}
\vspace{-1em}
\begin{lstlisting}[language={[ANSI]C}]
for((§\lsthlA{c3}§))
A[c1][(§\lsthlA{c3}§)] += u1[c1] * v1[c3]
+ u2[c1] * v2[c3];
\end{lstlisting}
\end{minipage}
\begin{center}
\onslide<3->{\alert{\textbf{Dependencies through memory!}}}
\end{center}
\end{frame}
\begin{frame}{Pruning memory-carried dependencies (Intel Skylake on Grid5000)}
\newcommand{\full}{{\color[HTML]{c7805a}Full}}
\newcommand{\pruned}{{\color[HTML]{4360be}Trim}}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\begin{minipage}[c]{0.50\textwidth}
\centering
\footnotesize
\begin{tabular}{l r r r r r}
\toprule
\textbf{Bencher} & \textbf{Dataset} &
\textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} \\
& & (\%) & (\%) & \\
\midrule
\multirow{2}*{llvm-mca} & \full{} & 36.71 & 59.80 & 0.57 \\
& \pruned{} & 27.06 & 21.04 & 0.79\\
\multirow{2}*{UiCA} & \full{} & 29.59 & 52.99 & 0.58 \\
& \pruned{} & 18.42 & 11.96 & 0.80\\
\multirow{2}*{Iaca} & \full{} & 30.23 & 57.18 & 0.59 \\
& \pruned{} & 17.55 & 12.17 & 0.82\\
\bottomrule
\end{tabular}
\end{minipage}
\hfill\vrule\hfill
\begin{minipage}[c]{0.48\textwidth}
\centering
\includegraphics[width=\textwidth]{nomemdeps_boxplot.svg}\\
{\small\textit{Outliers > 200\,\% trimmed}}
\end{minipage}
\end{columns}
\let\full\undefined
\let\pruned\undefined
\begin{center}
\textbf{\alert{Closer to expected results}}
\end{center}
\end{frame}