\section{Foundations} \begin{frame}{Bird's eye view of a CPU} \centering \includegraphics[height=0.94\textheight]{cpu_big_picture.svg} \end{frame} \begin{frame}{Possible bottlenecks} \begin{columns} \begin{column}{0.37\textwidth} \begin{center} \includegraphics[width=\textwidth]{cpu_big_picture_truncate.svg} \end{center} \end{column} \hfill \begin{column}{0.62\textwidth} \begin{tightitemize}{0pt} \begin{itemize} \item \alert{Frontend:} \uops{} not issued fast enough \bigskip \item \alert{Backend:} saturated execution units \bigskip \item \alert{Dependencies:} computation is stalled waiting for previous results \end{itemize} \end{tightitemize} \end{column} \end{columns} \end{frame} %\begin{frame}{Dependencies and the ROB} % \begin{columns} % \begin{column}{0.35\textwidth} % \begin{center} % \includegraphics[width=\textwidth]{cpu_frontend.svg} % \end{center} % \end{column} % \hfill % \begin{column}{0.64\textwidth} % \begin{tightitemize}{0pt} % \begin{itemize} % \item Dependencies can stall execution % \item Maybe instructions further down can be executed right now? % \end{itemize} % \begin{center} % \textbf{\alert{$\to$ Out-of-Order CPUs}} % \end{center} % \begin{itemize} % \item ROB: circular buffer of \uops{} % \item First possible instruction is issued % \end{itemize} % \end{tightitemize} % \end{column} % \end{columns} %\end{frame} %\begin{frame}{How do we get insights from this complex system?} % \textbf{Hardware counters} % \begin{itemize} % \item Built-in hardware, counters gathered at runtime % \item Very accurate % \item Available data varies from model to model % \item May not even be available at all % \end{itemize} % % \textbf{Simulation?} % \begin{itemize} % \item A modern CPU is \alert{$\sim$\,100e9 transistors}: very complex % models! % \item Very expensive, even for manufacturers for design validation % \item CPU design is industrial secret $\leadsto$ not available anyway % \item \ldots{}\ie{} not feasible. % \end{itemize} %\end{frame} \begin{frame}[fragile]{What do we analyze?} \begin{columns} \column{\dimexpr\paperwidth-20pt} \begin{minipage}[c]{0.60\textwidth} Pieces of code referred as \alert{``microkernels''}: \begin{itemize} \item body of an (assumed) infinite loop; \item in steady-state; \item L1-resident (memory model is out of scope); \item straight-line code (branches assumed not taken). \end{itemize} \end{minipage} \hfill\begin{minipage}[c]{0.35\textwidth} \begin{lstlisting}[language={[x86masm]Assembler}, numbers=none] loop: movsd (%rcx, %rax), %xmm0 mulsd %xmm1, %xmm0 addsd (%rdx, %rax), %xmm0 movsd %xmm0, (%rdx, %rax) addq $8, %rax cmpq $0x2260, %rax jne loop\end{lstlisting} \end{minipage} \vspace{2em} \begin{center} Reasonable hypotheses for the category of codes worth optimizing this way! \end{center} \end{columns} \end{frame} \begin{frame}{Code analyzers} \begin{itemize} \item Predict performance of a microkernel \item Features microarchitectural models \item Most often static analyzers \item Predict at least the \alert{\emph{reverse-throughput}} of a kernel (cycles per iteration) \item May derive further useful metrics, \eg{} bottlenecks, by inspecting their model at will \end{itemize} \end{frame} \begin{frame}{Existing code analyzers} \begin{minipage}[t]{0.55\textwidth} \begin{block}{Behavioural} \medskip \begin{itemize} \item \alert{\iaca{}}: Intel, proprietary. Intel CPUs only. \item \alert{\llvmmca{}}: \texttt{llvm} project, FOSS. \item \alert{\uica{}}, \alert{\uopsinfo{}}: academia. Intel CPUs only. \end{itemize} \end{block} \end{minipage} \hfill \begin{minipage}[t]{0.40\textwidth} \begin{block}{ML-based} \medskip \begin{itemize} \item \alert{\ithemal{}}: academia. \end{itemize} \end{block} \end{minipage} \\ \vfill Behavioural tools are (to some extent) based on \alert{manually-made} models!\\ \pause{} \bigskip{} \begin{center} \textbf{\alert{Ambition:}} \alert{automated} model generation. \end{center} \end{frame} \begin{frame}{When I started my PhD\ldots} \centering %\includegraphics[height=0.9\textheight]{patate_placeholder.jpg} \only<1>{\includegraphics[height=0.9\textheight]{sota_potato.svg}}% \only<2>{\includegraphics[height=0.9\textheight]{sota_potato_eval.svg}}\\ \end{frame}