211 lines
6.1 KiB
TeX
211 lines
6.1 KiB
TeX
\section{\cesasme: evaluate and compare state-of-the-art code analyzers}
|
|
|
|
\begin{frame}[fragile]
|
|
\begin{minipage}{0.6\textwidth}
|
|
\begin{center}
|
|
Matrix multiplication:
|
|
\end{center}
|
|
\begin{lstlisting}[language={[x86masm]Assembler}]
|
|
loop:
|
|
movsd (%rcx, %rax), %xmm0
|
|
mulsd %xmm1, %xmm0
|
|
addsd (%rdx, %rax), %xmm0
|
|
movsd %xmm0, (%rdx, %rax)
|
|
addq $8, %rax
|
|
cmpq $0x2260, %rax
|
|
jne loop\end{lstlisting}
|
|
\end{minipage}\hfill\vrule\hfill
|
|
\begin{minipage}{0.38\textwidth}
|
|
\begin{tabular}{l r}
|
|
\llvmmca{}: & 1.5 cycles/iter \\
|
|
\iaca{}: & 2.0 cycles/iter \\
|
|
\ithemal{}: & 2.0 cycles/iter \\
|
|
\uica{}: & 3.0 cycles/iter \\
|
|
\end{tabular}
|
|
\vspace{1em}
|
|
\begin{center}
|
|
\only<1>{\alert{\textbf{Which tool is correct?}}}
|
|
\only<2->{\textbf{Which tool is correct?}}
|
|
\end{center}
|
|
\end{minipage}
|
|
|
|
\vfill{}
|
|
|
|
\pause{}
|
|
\begin{center}
|
|
\textbf{We lack:}\\
|
|
\hfill\textbf{\alert{Benchmarks}}\hfill\textbf{\alert{Context}}\hfill~
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Generating benchmarks}
|
|
We need benchmarks\ldots \\
|
|
\vspace{1em}
|
|
{\def\arraystretch{1.2}
|
|
\newcommand{\litem}{\usebeamertemplate*{itemize item}\hspace{-0.5em}}
|
|
\begin{tabular}{rl l}
|
|
\litem{} & representative &
|
|
\visible<2->{\alert{Polybench}}\\
|
|
\litem{} & infinite, L1-resident loops &
|
|
\visible<3->{\alert{``microkernelification''} + verify}
|
|
\\
|
|
\litem{} & without control flow &
|
|
\visible<4->{\alert{Polybench}} \\
|
|
\litem{} & stressing diverse resources &
|
|
\visible<5->{\alert{Polyhedral transformations}}
|
|
\\
|
|
& & \visible<5->{+ \alert{unrolling} + \alert{compiler
|
|
options}} \\
|
|
\litem{} & plenty of them &
|
|
\visible<6->{\alert{Even more} of all those $\nnearrow$} \\
|
|
\end{tabular}
|
|
\let\litem\undefined
|
|
}
|
|
|
|
\begin{center}
|
|
\visible<6->{\textbf{\leadsto{} yields \alert{~3500} benchmarks}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{In-context baseline: lifting predictions}
|
|
\begin{center}
|
|
\textbf{Consider instead $\kerK$ = \alert{full kernel}, with its
|
|
context\\
|
|
$\leadsto$ \alert{multiple} basic blocks}
|
|
\end{center}
|
|
|
|
\pause
|
|
|
|
\begin{itemize}
|
|
\item Measure total kernel time \textbf{in context}
|
|
\item Instrument full kernel $\kerK$: for each basic block,
|
|
$\operatorname{occur}(\text{bb})$
|
|
\item For each tool
|
|
\begin{itemize}
|
|
\item for each bb,
|
|
$\operatorname{prediction}(\text{bb})$
|
|
\item \emph{lift} predictions: \[
|
|
\operatorname{prediction}(\kerK) =
|
|
\sum_{\text{bb} \in \kerK}
|
|
\operatorname{occur}(\text{bb}) \times
|
|
\operatorname{prediction}(\text{bb})
|
|
\]
|
|
\end{itemize}
|
|
\end{itemize}
|
|
|
|
\vfill
|
|
\pause
|
|
\begin{center}
|
|
\textbf{Now we have a baseline.}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\vspace{0.5cm}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-8pt}
|
|
\centering
|
|
\input{overview.tex}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{First results (Intel Skylake on Grid5000)}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-8pt}
|
|
\centering
|
|
\begin{minipage}[c]{0.27\textwidth}
|
|
~
|
|
\end{minipage}
|
|
\hfill
|
|
\begin{minipage}[c]{0.4\textwidth}
|
|
\centering
|
|
\includegraphics[width=\textwidth]{overall_analysis_boxplot.svg}\\
|
|
\end{minipage}
|
|
\hfill
|
|
\begin{minipage}[c]{0.27\textwidth}
|
|
\centering
|
|
{\small\textit{Outliers > 250\,\% trimmed}} \\
|
|
|
|
\vspace{2em}
|
|
{\small\textit{Associated table in\\
|
|
supplementary material}}
|
|
\end{minipage}
|
|
|
|
\begin{center}
|
|
\textbf{\alert{Severely worse} than previous evaluations!}\\
|
|
\textbf{\hspace{0.7cm}Harness broken?\hfill{}Harder
|
|
benchmarks?\hfill{}Previously undetected weaknesses?\hspace{0.7cm}~}
|
|
\end{center}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Searching for areas of improvement}
|
|
\begin{itemize}
|
|
\item{} Tools often wrong on the \emph{same} rows
|
|
\begin{itemize}
|
|
\item \llvmmca{}, \iaca{} and \uica{} share 80\,\% of their
|
|
worst 30\,\%
|
|
\end{itemize}
|
|
\item{} Often \texttt{-O1} rows
|
|
\end{itemize}
|
|
|
|
\begin{center}
|
|
\textbf{Crucial difference:}
|
|
\end{center}
|
|
|
|
\newcommand{\lsthlA}[1]{\texttt{\color[HTML]{df018a}#1}}
|
|
\newcommand{\lsthlB}[1]{\texttt{\color[HTML]{d88900}#1}}
|
|
\begin{minipage}[t]{0.47\textwidth}
|
|
\begin{center}
|
|
\textbf{{\color{red}Bad}\onslide<2->{: reduction}}
|
|
\end{center}
|
|
\vspace{-1em}
|
|
\begin{lstlisting}[language={[ANSI]C}]
|
|
for((§\lsthlA{c3}§))
|
|
tmp[(§\lsthlB{c1}§)] += A[c1][c3] * x[c3];
|
|
|
|
\end{lstlisting}
|
|
\end{minipage}
|
|
\hfill\vrule\hfill
|
|
\begin{minipage}[t]{0.47\textwidth}
|
|
\begin{center}
|
|
\textbf{{\color[HTML]{008f0c}Good}\onslide<2->{: map}}
|
|
\end{center}
|
|
\vspace{-1em}
|
|
\begin{lstlisting}[language={[ANSI]C}]
|
|
for((§\lsthlA{c3}§))
|
|
A[c1][(§\lsthlA{c3}§)] += u1[c1] * v1[c3]
|
|
+ u2[c1] * v2[c3];
|
|
\end{lstlisting}
|
|
\end{minipage}
|
|
|
|
\begin{center}
|
|
\onslide<3->{\alert{\textbf{Dependencies through memory!}}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Pruning memory-carried dependencies (Intel Skylake on Grid5000)}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-8pt}
|
|
\centering
|
|
\begin{minipage}[c]{0.24\textwidth}
|
|
~
|
|
\end{minipage}
|
|
\hfill
|
|
\begin{minipage}[c]{0.5\textwidth}
|
|
\centering
|
|
\includegraphics[width=\textwidth]{nomemdeps_boxplot.svg}\\
|
|
\end{minipage}
|
|
\hfill
|
|
\begin{minipage}[c]{0.24\textwidth}
|
|
\centering{}
|
|
{\small\textit{Outliers > 200\,\% \\
|
|
trimmed}}
|
|
\end{minipage}
|
|
\end{columns}
|
|
|
|
\begin{center}
|
|
\textbf{\alert{Closer to expected results}}
|
|
\end{center}
|
|
\end{frame}
|
|
|