203 lines
6 KiB
TeX
203 lines
6 KiB
TeX
\section{\cesasme: evaluate and compare state-of-the-art code analyzers}
|
|
|
|
\begin{frame}[fragile]
|
|
\begin{minipage}{0.6\textwidth}
|
|
\begin{center}
|
|
Matrix multiplication:
|
|
\end{center}
|
|
\begin{lstlisting}[language={[x86masm]Assembler}]
|
|
loop:
|
|
movsd (%rcx, %rax), %xmm0
|
|
mulsd %xmm1, %xmm0
|
|
addsd (%rdx, %rax), %xmm0
|
|
movsd %xmm0, (%rdx, %rax)
|
|
addq $8, %rax
|
|
cmpq $0x2260, %rax
|
|
jne loop\end{lstlisting}
|
|
\end{minipage}\hfill\vrule\hfill
|
|
\begin{minipage}{0.38\textwidth}
|
|
\begin{tabular}{l r}
|
|
\llvmmca{}: & 1.5 cycles \\
|
|
\iaca{}: & 2.0 cycles \\
|
|
\ithemal{}: & 2.0 cycles \\
|
|
\uica{}: & 3.0 cycles \\
|
|
\end{tabular}
|
|
\end{minipage}
|
|
|
|
\vfill{}
|
|
|
|
\begin{center}
|
|
\textbf{Which prediction is correct?} \quad
|
|
\alert{\textbf{We lack a sound baseline!}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Lifting predictions}
|
|
\begin{itemize}
|
|
\item Instrument full kernel $\kerK$: for each basic block,
|
|
$\operatorname{occur}(\text{bb})$
|
|
\item For each tool
|
|
\begin{itemize}
|
|
\item for each bb,
|
|
$\operatorname{prediction}(\text{bb})$
|
|
\item \emph{lift} predictions: \[
|
|
\operatorname{prediction}(\kerK) =
|
|
\sum_{\text{bb} \in \kerK}
|
|
\operatorname{occur}(\text{bb}) \times
|
|
\operatorname{prediction}(\text{bb})
|
|
\]
|
|
\end{itemize}
|
|
\item Measure total kernel time
|
|
\end{itemize}
|
|
|
|
\pause
|
|
\begin{center}
|
|
\textbf{Now we have a baseline.}\\
|
|
\textit{\color[HTML]{6e7581}(And \llvmmca{} was correct.)}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Generating benchmarks}
|
|
We need benchmarks\ldots \\
|
|
\vspace{1em}
|
|
{\def\arraystretch{1.2}
|
|
\newcommand{\litem}{\usebeamertemplate*{itemize item}\hspace{-0.5em}}
|
|
\begin{tabular}{rl l}
|
|
\litem{} & representative of scientific computation &
|
|
\visible<2->{\alert{Polybench}}\\
|
|
\litem{} & infinite, L1-resident loops &
|
|
\visible<3->{\alert{``microkernelification''} + verify}
|
|
\\
|
|
\litem{} & stressing diverse resources &
|
|
\visible<4->{\alert{Polyhedral transformations}}
|
|
\\
|
|
& & \visible<4->{+ \alert{unrolling} + \alert{compiler
|
|
options}} \\
|
|
\litem{} & plenty of them &
|
|
\visible<5->{\alert{Even more} of all those $\nnearrow$} \\
|
|
\end{tabular}
|
|
\let\litem\undefined
|
|
}
|
|
|
|
\begin{center}
|
|
\visible<6->{\textbf{\leadsto{} yields \alert{~3500} benchmarks}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\vspace{0.5cm}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-8pt}
|
|
\centering
|
|
\input{overview.tex}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{First results (Intel Skylake on Grid5000)}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-8pt}
|
|
\centering
|
|
\begin{minipage}[c]{0.55\textwidth}
|
|
\centering
|
|
\footnotesize
|
|
\begin{tabular}{l r r r r r}
|
|
\toprule
|
|
\textbf{Bencher} & Failures &
|
|
\textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} & \textbf{Time}\\
|
|
& (\%) & (\%) & (\%) & & (CPU$\cdot$h) \\
|
|
\midrule
|
|
BHive & 37.20 & 27.95 & 23.01 & 0.81 & 1.37\\
|
|
llvm-mca & 0.00 & 36.71 & 59.80 & 0.57 & 0.96 \\
|
|
UiCA & 0.00 & 29.59 & 52.99 & 0.58 & 2.12 \\
|
|
Ithemal & 0.00 & 57.04 & 75.69 & 0.39 & 0.38 \\
|
|
Iaca & 0.00 & 30.23 & 57.18 & 0.59 & 1.31 \\
|
|
Gus & 0.00 & 20.37 & 30.59 & 0.82 & 188.04 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\end{minipage}
|
|
\hfill\vrule\hfill
|
|
\begin{minipage}[c]{0.43\textwidth}
|
|
\centering
|
|
\includegraphics[width=\textwidth]{overall_analysis_boxplot.svg}\\
|
|
{\small\textit{Outliers > 250\,\% trimmed}}
|
|
\end{minipage}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Searching for areas of improvement}
|
|
\begin{itemize}
|
|
\item{} Tools often wrong on the \emph{same} rows
|
|
\begin{itemize}
|
|
\item \llvmmca{}, \iaca{} and \uica{} share 80\,\% of their
|
|
worst 30\,\%
|
|
\end{itemize}
|
|
\item{} Often \texttt{-O1} rows
|
|
\end{itemize}
|
|
|
|
\begin{center}
|
|
\textbf{Crucial difference:}
|
|
\end{center}
|
|
|
|
\begin{minipage}[t]{0.47\textwidth}
|
|
\begin{center}
|
|
\textbf{\color[HTML]{008f0c}Good}
|
|
\end{center}
|
|
\vspace{-1em}
|
|
\begin{lstlisting}[language={[ANSI]C}]
|
|
for(c3)
|
|
A[c1][c3] += u1[c1] * v1[c3]
|
|
+ u2[c1] * v2[c3];
|
|
\end{lstlisting}
|
|
\end{minipage}\hfill\vrule\hfill
|
|
\begin{minipage}[t]{0.47\textwidth}
|
|
\begin{center}
|
|
\textbf{\color{red}Bad}
|
|
\end{center}
|
|
\vspace{-1em}
|
|
\begin{lstlisting}[language={[ANSI]C}]
|
|
for(c3)
|
|
tmp[c1] += A[c1][c3] * x[c3];
|
|
\end{lstlisting}
|
|
\end{minipage}
|
|
|
|
\pause{}
|
|
\begin{center}
|
|
\alert{\textbf{Dependencies through memory!}}
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Pruning memory-carried dependencies (Intel Skylake on Grid5000)}
|
|
\newcommand{\full}{{\color[HTML]{c7805a}Full}}
|
|
\newcommand{\pruned}{{\color[HTML]{4360be}Trim}}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-8pt}
|
|
\centering
|
|
\begin{minipage}[c]{0.50\textwidth}
|
|
\centering
|
|
\footnotesize
|
|
\begin{tabular}{l r r r r r}
|
|
\toprule
|
|
\textbf{Bencher} & \textbf{Dataset} &
|
|
\textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} \\
|
|
& & (\%) & (\%) & \\
|
|
\midrule
|
|
\multirow{2}*{llvm-mca} & \full{} & 36.71 & 59.80 & 0.57 \\
|
|
& \pruned{} & 27.06 & 21.04 & 0.79\\
|
|
\multirow{2}*{UiCA} & \full{} & 29.59 & 52.99 & 0.58 \\
|
|
& \pruned{} & 18.42 & 11.96 & 0.80\\
|
|
\multirow{2}*{Iaca} & \full{} & 30.23 & 57.18 & 0.59 \\
|
|
& \pruned{} & 17.55 & 12.17 & 0.82\\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\end{minipage}
|
|
\hfill\vrule\hfill
|
|
\begin{minipage}[c]{0.48\textwidth}
|
|
\centering
|
|
\includegraphics[width=\textwidth]{nomemdeps_boxplot.svg}\\
|
|
{\small\textit{Outliers > 200\,\% trimmed}}
|
|
\end{minipage}
|
|
\end{columns}
|
|
\let\full\undefined
|
|
\let\pruned\undefined
|
|
\end{frame}
|
|
|