phd-defense/slides/40_cesasme/main.tex

211 lines
6.1 KiB
TeX

\section{\cesasme: evaluate and compare state-of-the-art code analyzers}
\begin{frame}[fragile]
\begin{minipage}{0.6\textwidth}
\begin{center}
Matrix multiplication:
\end{center}
\begin{lstlisting}[language={[x86masm]Assembler}]
loop:
movsd (%rcx, %rax), %xmm0
mulsd %xmm1, %xmm0
addsd (%rdx, %rax), %xmm0
movsd %xmm0, (%rdx, %rax)
addq $8, %rax
cmpq $0x2260, %rax
jne loop\end{lstlisting}
\end{minipage}\hfill\vrule\hfill
\begin{minipage}{0.38\textwidth}
\begin{tabular}{l r}
\llvmmca{}: & 1.5 cycles/iter \\
\iaca{}: & 2.0 cycles/iter \\
\ithemal{}: & 2.0 cycles/iter \\
\uica{}: & 3.0 cycles/iter \\
\end{tabular}
\vspace{1em}
\begin{center}
\only<1>{\alert{\textbf{Which tool is correct?}}}
\only<2->{\textbf{Which tool is correct?}}
\end{center}
\end{minipage}
\vfill{}
\pause{}
\begin{center}
\textbf{We lack:}\\
\hfill\textbf{\alert{Benchmarks}}\hfill\textbf{\alert{Context}}\hfill~
\end{center}
\end{frame}
\begin{frame}{Generating benchmarks}
We need benchmarks\ldots \\
\vspace{1em}
{\def\arraystretch{1.2}
\newcommand{\litem}{\usebeamertemplate*{itemize item}\hspace{-0.5em}}
\begin{tabular}{rl l}
\litem{} & representative &
\visible<2->{\alert{Polybench}}\\
\litem{} & infinite, L1-resident loops &
\visible<3->{\alert{``microkernelification''} + verify}
\\
\litem{} & without control flow &
\visible<4->{\alert{Polybench}} \\
\litem{} & stressing diverse resources &
\visible<5->{\alert{Polyhedral transformations}}
\\
& & \visible<5->{+ \alert{unrolling} + \alert{compiler
options}} \\
\litem{} & plenty of them &
\visible<6->{\alert{Even more} of all those $\nnearrow$} \\
\end{tabular}
\let\litem\undefined
}
\begin{center}
\visible<6->{\textbf{\leadsto{} yields \alert{~3500} benchmarks}}
\end{center}
\end{frame}
\begin{frame}{In-context baseline: lifting predictions}
\begin{center}
\textbf{Consider instead $\kerK$ = \alert{full kernel}, with its
context\\
$\leadsto$ \alert{multiple} basic blocks}
\end{center}
\pause
\begin{itemize}
\item Measure total kernel time \textbf{in context}
\item Instrument full kernel $\kerK$: for each basic block,
$\operatorname{occur}(\text{bb})$
\item For each tool
\begin{itemize}
\item for each bb,
$\operatorname{prediction}(\text{bb})$
\item \emph{lift} predictions: \[
\operatorname{prediction}(\kerK) =
\sum_{\text{bb} \in \kerK}
\operatorname{occur}(\text{bb}) \times
\operatorname{prediction}(\text{bb})
\]
\end{itemize}
\end{itemize}
\vfill
\pause
\begin{center}
\textbf{Now we have a baseline.}
\end{center}
\end{frame}
\begin{frame}
\vspace{0.5cm}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\input{overview.tex}
\end{columns}
\end{frame}
\begin{frame}{First results (Intel Skylake on Grid5000)}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\begin{minipage}[c]{0.27\textwidth}
~
\end{minipage}
\hfill
\begin{minipage}[c]{0.4\textwidth}
\centering
\includegraphics[width=\textwidth]{overall_analysis_boxplot.svg}\\
\end{minipage}
\hfill
\begin{minipage}[c]{0.27\textwidth}
\centering
{\small\textit{Outliers > 250\,\% trimmed}} \\
\vspace{2em}
{\small\textit{Associated table in\\
supplementary material}}
\end{minipage}
\begin{center}
\textbf{\alert{Severely worse} than previous evaluations!}\\
\textbf{\hspace{0.7cm}Harness broken?\hfill{}Harder
benchmarks?\hfill{}Previously undetected weaknesses?\hspace{0.7cm}~}
\end{center}
\end{columns}
\end{frame}
\begin{frame}[fragile]{Searching for areas of improvement}
\begin{itemize}
\item{} Tools often wrong on the \emph{same} rows
\begin{itemize}
\item \llvmmca{}, \iaca{} and \uica{} share 80\,\% of their
worst 30\,\%
\end{itemize}
\item{} Often \texttt{-O1} rows
\end{itemize}
\begin{center}
\textbf{Crucial difference:}
\end{center}
\newcommand{\lsthlA}[1]{\texttt{\color[HTML]{df018a}#1}}
\newcommand{\lsthlB}[1]{\texttt{\color[HTML]{d88900}#1}}
\begin{minipage}[t]{0.47\textwidth}
\begin{center}
\textbf{{\color{red}Bad}\onslide<2->{: reduction}}
\end{center}
\vspace{-1em}
\begin{lstlisting}[language={[ANSI]C}]
for((§\lsthlA{c3}§))
tmp[(§\lsthlB{c1}§)] += A[c1][c3] * x[c3];
\end{lstlisting}
\end{minipage}
\hfill\vrule\hfill
\begin{minipage}[t]{0.47\textwidth}
\begin{center}
\textbf{{\color[HTML]{008f0c}Good}\onslide<2->{: map}}
\end{center}
\vspace{-1em}
\begin{lstlisting}[language={[ANSI]C}]
for((§\lsthlA{c3}§))
A[c1][(§\lsthlA{c3}§)] += u1[c1] * v1[c3]
+ u2[c1] * v2[c3];
\end{lstlisting}
\end{minipage}
\begin{center}
\onslide<3->{\alert{\textbf{Dependencies through memory!}}}
\end{center}
\end{frame}
\begin{frame}{Pruning memory-carried dependencies (Intel Skylake on Grid5000)}
\begin{columns}
\column{\dimexpr\paperwidth-8pt}
\centering
\begin{minipage}[c]{0.24\textwidth}
~
\end{minipage}
\hfill
\begin{minipage}[c]{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{nomemdeps_boxplot.svg}\\
\end{minipage}
\hfill
\begin{minipage}[c]{0.24\textwidth}
\centering{}
{\small\textit{Outliers > 200\,\% \\
trimmed}}
\end{minipage}
\end{columns}
\begin{center}
\textbf{\alert{Closer to expected results}}
\end{center}
\end{frame}