phd-defense/slides/40_cesasme/main.tex

\section{\cesasme: evaluate and compare state-of-the-art code analyzers}

\begin{frame}[fragile]
    \begin{minipage}{0.6\textwidth}
        \begin{center}
            Matrix multiplication:
        \end{center}
    \begin{lstlisting}[language={[x86masm]Assembler}]
loop:
    movsd (%rcx, %rax), %xmm0
    mulsd %xmm1, %xmm0
    addsd (%rdx, %rax), %xmm0
    movsd %xmm0, (%rdx, %rax)
    addq $8, %rax
    cmpq $0x2260, %rax
    jne loop\end{lstlisting}
    \end{minipage}\hfill\vrule\hfill
    \begin{minipage}{0.38\textwidth}
        \begin{tabular}{l r}
            \llvmmca{}: & 1.5 cycles/iter \\
            \iaca{}: & 2.0 cycles/iter \\
            \ithemal{}: & 2.0 cycles/iter \\
            \uica{}: & 3.0 cycles/iter \\
        \end{tabular}
        \vspace{1em}
        \begin{center}
            \only<1>{\alert{\textbf{Which tool is correct?}}}
            \only<2->{\textbf{Which tool is correct?}}
        \end{center}
    \end{minipage}

    \vfill{}

    \pause{}
    \begin{center}
        \textbf{We lack:}\\
        \hfill\textbf{\alert{Benchmarks}}\hfill\textbf{\alert{Context}}\hfill~
    \end{center}
\end{frame}

\begin{frame}{Generating benchmarks}
    We need benchmarks\ldots \\
    \vspace{1em}
    {\def\arraystretch{1.2}
        \newcommand{\litem}{\usebeamertemplate*{itemize item}\hspace{-0.5em}}
    \begin{tabular}{rl l}
        \litem{} & representative of scientific computation &
            \visible<2->{\alert{Polybench}}\\
        \litem{} & infinite, L1-resident loops &
            \visible<3->{\alert{``microkernelification''} + verify}
            \\
        \litem{} & without control flow &
            \visible<4->{\alert{Polybench}} \\
        \litem{} & stressing diverse resources &
            \visible<5->{\alert{Polyhedral transformations}}
            \\
         & & \visible<5->{+ \alert{unrolling} + \alert{compiler
                 options}} \\
        \litem{} & plenty of them &
            \visible<6->{\alert{Even more} of all those $\nnearrow$} \\
    \end{tabular}
    \let\litem\undefined
    }

    \begin{center}
        \visible<6->{\textbf{\leadsto{} yields \alert{~3500} benchmarks}}
    \end{center}
\end{frame}

\begin{frame}{In-context baseline: lifting predictions}
    \begin{center}
        \textbf{Consider instead $\kerK$ = \alert{full kernel}, with its
        context\\
    $\leadsto$ \alert{multiple} basic blocks}
    \end{center}

    \pause

    \begin{itemize}
        \item Measure total kernel time \textbf{in context}
        \item Instrument full kernel $\kerK$: for each basic block,
            $\operatorname{occur}(\text{bb})$
        \item For each tool
            \begin{itemize}
                \item for each bb,
                    $\operatorname{prediction}(\text{bb})$
                \item \emph{lift} predictions: \[
                        \operatorname{prediction}(\kerK) =
                        \sum_{\text{bb} \in \kerK}
                        \operatorname{occur}(\text{bb}) \times
                        \operatorname{prediction}(\text{bb})
                    \]
            \end{itemize}
    \end{itemize}

    \vfill
    \pause
    \begin{center}
        \textbf{Now we have a baseline.}
    \end{center}
\end{frame}

\begin{frame}
  \vspace{0.5cm}
  \begin{columns}
    \column{\dimexpr\paperwidth-8pt}
    \centering
    \input{overview.tex}
  \end{columns}
\end{frame}

\begin{frame}{First results (Intel Skylake on Grid5000)}
  \begin{columns}
    \column{\dimexpr\paperwidth-8pt}
    \centering
    \begin{minipage}[c]{0.55\textwidth}
    \centering
    \footnotesize
    \begin{tabular}{l r r r r r}
        \toprule
        \textbf{Bencher} & Failures &
\textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} & \textbf{Time}\\
              & (\%) & (\%) & (\%) & & (CPU$\cdot$h) \\
\midrule
BHive & 37.20 & 27.95 & 23.01 & 0.81 & 1.37\\
llvm-mca & 0.00 & 36.71 & 59.80 & 0.57 & 0.96 \\
UiCA & 0.00 & 29.59 & 52.99 & 0.58 & 2.12 \\
Ithemal & 0.00 & 57.04 & 75.69 & 0.39 & 0.38 \\
Iaca & 0.00 & 30.23 & 57.18 & 0.59 & 1.31 \\
Gus & 0.00 & 20.37 & 30.59 & 0.82 & 188.04 \\
\bottomrule
    \end{tabular}
    \end{minipage}
    \hfill\vrule\hfill
    \begin{minipage}[c]{0.38\textwidth}
        \centering
        \includegraphics[width=\textwidth]{overall_analysis_boxplot.svg}\\
        {\small\textit{Outliers > 250\,\% trimmed}}
    \end{minipage}

    \begin{center}
        \textbf{\alert{Severely worse} than previous evaluations!}\\
        \textbf{\hspace{0.7cm}Harness broken?\hfill{}Harder
        benchmarks?\hfill{}Previously undetected weaknesses?\hspace{0.7cm}~}
    \end{center}
  \end{columns}
\end{frame}

\begin{frame}[fragile]{Searching for areas of improvement}
    \begin{itemize}
        \item{} Tools often wrong on the \emph{same} rows
            \begin{itemize}
                \item \llvmmca{}, \iaca{} and \uica{} share 80\,\% of their
                    worst 30\,\%
            \end{itemize}
        \item{} Often \texttt{-O1} rows
    \end{itemize}

    \begin{center}
        \textbf{Crucial difference:}
    \end{center}

    \begin{minipage}[t]{0.47\textwidth}
    \begin{center}
        \textbf{\color{red}Bad}
    \end{center}
    \vspace{-1em}
\begin{lstlisting}[language={[ANSI]C}]
for(c3)
    tmp[c1] += A[c1][c3] * x[c3];
\end{lstlisting}
    \end{minipage}
\hfill\vrule\hfill
\begin{minipage}[t]{0.47\textwidth}
    \begin{center}
        \textbf{\color[HTML]{008f0c}Good}
    \end{center}
    \vspace{-1em}
\begin{lstlisting}[language={[ANSI]C}]
for(c3)
    A[c1][c3] += u1[c1] * v1[c3]
               + u2[c1] * v2[c3];
\end{lstlisting}
\end{minipage}

    \pause{}
    \begin{center}
        \alert{\textbf{Dependencies through memory!}}
    \end{center}
\end{frame}

\begin{frame}{Pruning memory-carried dependencies (Intel Skylake on Grid5000)}
    \newcommand{\full}{{\color[HTML]{c7805a}Full}}
    \newcommand{\pruned}{{\color[HTML]{4360be}Trim}}
  \begin{columns}
    \column{\dimexpr\paperwidth-8pt}
    \centering
    \begin{minipage}[c]{0.50\textwidth}
    \centering
    \footnotesize
    \begin{tabular}{l r r r r r}
        \toprule
        \textbf{Bencher} & \textbf{Dataset} &
\textbf{MAPE} & \textbf{Median} & \textbf{$K_\tau$} \\
              & & (\%) & (\%) & \\
\midrule
\multirow{2}*{llvm-mca} & \full{} & 36.71 & 59.80 & 0.57 \\
    & \pruned{} & 27.06 & 21.04 & 0.79\\
\multirow{2}*{UiCA} & \full{} & 29.59 & 52.99 & 0.58 \\
    & \pruned{} & 18.42 & 11.96 & 0.80\\
\multirow{2}*{Iaca} & \full{} & 30.23 & 57.18 & 0.59 \\
    & \pruned{} & 17.55 & 12.17 & 0.82\\
\bottomrule
    \end{tabular}
    \end{minipage}
    \hfill\vrule\hfill
    \begin{minipage}[c]{0.48\textwidth}
        \centering
        \includegraphics[width=\textwidth]{nomemdeps_boxplot.svg}\\
        {\small\textit{Outliers > 200\,\% trimmed}}
    \end{minipage}
  \end{columns}
    \let\full\undefined
    \let\pruned\undefined

    \begin{center}
        \textbf{\alert{Closer to expected results}}
    \end{center}
\end{frame}