\section{\cesasme: evaluate and compare state-of-the-art code analyzers} \begin{frame}[fragile] \begin{minipage}{0.6\textwidth} \begin{center} Matrix multiplication: \end{center} \begin{lstlisting}[language={[x86masm]Assembler}] loop: movsd (%rcx, %rax), %xmm0 mulsd %xmm1, %xmm0 addsd (%rdx, %rax), %xmm0 movsd %xmm0, (%rdx, %rax) addq $8, %rax cmpq $0x2260, %rax jne loop\end{lstlisting} \end{minipage}\hfill\vrule\hfill \begin{minipage}{0.38\textwidth} \begin{tabular}{l r} \llvmmca{}: & 1.5 cycles \\ \iaca{}: & 2.0 cycles \\ \ithemal{}: & 2.0 cycles \\ \uica{}: & 3.0 cycles \\ \end{tabular} \end{minipage} \vfill{} \begin{center} \textbf{Which prediction is correct?} \quad \alert{\textbf{We lack a sound baseline!}} \end{center} \end{frame} \begin{frame}{Lifting predictions} \begin{itemize} \item Instrument full kernel $\kerK$: for each basic block, $\operatorname{occur}(\text{bb})$ \item For each tool \begin{itemize} \item for each bb, $\operatorname{prediction}(\text{bb})$ \item \emph{lift} predictions: \[ \operatorname{prediction}(\kerK) = \sum_{\text{bb} \in \kerK} \operatorname{occur}(\text{bb}) \times \operatorname{prediction}(\text{bb}) \] \end{itemize} \item Measure total kernel time \end{itemize} \pause \begin{center} \textbf{Now we have a baseline.}\\ \textit{\color[HTML]{6e7581}(And \llvmmca{} was correct.)} \end{center} \end{frame} \begin{frame}{Generating benchmarks} We need benchmarks\ldots \\ \vspace{1em} {\def\arraystretch{1.2} \newcommand{\litem}{\usebeamertemplate*{itemize item}\hspace{-0.5em}} \begin{tabular}{rl l} \litem{} & representative of scientific computation & \visible<2->{\alert{Polybench}}\\ \litem{} & infinite, L1-resident loops & \visible<3->{\alert{``microkernelification''} + verify} \\ \litem{} & stressing diverse resources & \visible<4->{\alert{Polyhedral transformations}} \\ & & \visible<4->{+ \alert{unrolling} + \alert{compiler options}} \\ \litem{} & plenty of them & \visible<5->{\alert{Even more} of all those $\nnearrow$} \\ \end{tabular} \let\litem\undefined } \begin{center} \visible<6->{\textbf{\leadsto{} yields \alert{~3500} benchmarks}} \end{center} \end{frame} \begin{frame} \vspace{0.5cm} \begin{columns} \column{\dimexpr\paperwidth-8pt} \centering \input{overview.tex} \end{columns} \end{frame} \begin{frame}{First results (Intel Skylake on Grid5000)} \begin{columns} \column{\dimexpr\paperwidth-8pt} \centering \begin{minipage}[c]{0.55\textwidth} \centering \footnotesize \begin{tabular}{l r r r r r} \toprule \textbf{Bencher} & Failures & \textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} & \textbf{Time}\\ & (\%) & (\%) & (\%) & & (CPU$\cdot$h) \\ \midrule BHive & 37.20 & 27.95 & 23.01 & 0.81 & 1.37\\ llvm-mca & 0.00 & 36.71 & 59.80 & 0.57 & 0.96 \\ UiCA & 0.00 & 29.59 & 52.99 & 0.58 & 2.12 \\ Ithemal & 0.00 & 57.04 & 75.69 & 0.39 & 0.38 \\ Iaca & 0.00 & 30.23 & 57.18 & 0.59 & 1.31 \\ Gus & 0.00 & 20.37 & 30.59 & 0.82 & 188.04 \\ \bottomrule \end{tabular} \end{minipage} \hfill\vrule\hfill \begin{minipage}[c]{0.43\textwidth} \centering \includegraphics[width=\textwidth]{overall_analysis_boxplot.svg}\\ {\small\textit{Outliers > 250\,\% trimmed}} \end{minipage} \end{columns} \end{frame} \begin{frame}[fragile]{Searching for areas of improvement} \begin{itemize} \item{} Tools often wrong on the \emph{same} rows \begin{itemize} \item \llvmmca{}, \iaca{} and \uica{} share 80\,\% of their worst 30\,\% \end{itemize} \item{} Often \texttt{-O1} rows \end{itemize} \begin{center} \textbf{Crucial difference:} \end{center} \begin{minipage}[t]{0.47\textwidth} \begin{center} \textbf{\color[HTML]{008f0c}Good} \end{center} \vspace{-1em} \begin{lstlisting}[language={[ANSI]C}] for(c3) A[c1][c3] += u1[c1] * v1[c3] + u2[c1] * v2[c3]; \end{lstlisting} \end{minipage}\hfill\vrule\hfill \begin{minipage}[t]{0.47\textwidth} \begin{center} \textbf{\color{red}Bad} \end{center} \vspace{-1em} \begin{lstlisting}[language={[ANSI]C}] for(c3) tmp[c1] += A[c1][c3] * x[c3]; \end{lstlisting} \end{minipage} \pause{} \begin{center} \alert{\textbf{Dependencies through memory!}} \end{center} \end{frame} \begin{frame}{Pruning memory-carried dependencies (Intel Skylake on Grid5000)} \newcommand{\full}{{\color[HTML]{c7805a}Full}} \newcommand{\pruned}{{\color[HTML]{4360be}Trim}} \begin{columns} \column{\dimexpr\paperwidth-8pt} \centering \begin{minipage}[c]{0.50\textwidth} \centering \footnotesize \begin{tabular}{l r r r r r} \toprule \textbf{Bencher} & Dataset & \textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} \\ & & (\%) & (\%) & \\ \midrule \multirow{2}*{llvm-mca} & \full{} & 36.71 & 59.80 & 0.57 \\ & \pruned{} & 27.06 & 21.04 & 0.79\\ \multirow{2}*{UiCA} & \full{} & 29.59 & 52.99 & 0.58 \\ & \pruned{} & 18.42 & 11.96 & 0.80\\ \multirow{2}*{Iaca} & \full{} & 30.23 & 57.18 & 0.59 \\ & \pruned{} & 17.55 & 12.17 & 0.82\\ \bottomrule \end{tabular} \end{minipage} \hfill\vrule\hfill \begin{minipage}[c]{0.48\textwidth} \centering \includegraphics[width=\textwidth]{nomemdeps_boxplot.svg}\\ {\small\textit{Outliers > 200\,\% trimmed}} \end{minipage} \end{columns} \let\full\undefined \let\pruned\undefined \end{frame}