158 lines
5.1 KiB
TeX
158 lines
5.1 KiB
TeX
\section{Foundations}
|
|
|
|
\begin{frame}{Bird's eye view of a CPU}
|
|
\centering
|
|
\includegraphics[height=0.94\textheight]{cpu_big_picture.svg}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Possible bottlenecks}
|
|
\begin{columns}
|
|
\begin{column}{0.37\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{cpu_big_picture_truncate.svg}
|
|
\end{center}
|
|
\end{column}
|
|
\hfill
|
|
\begin{column}{0.62\textwidth}
|
|
\begin{tightitemize}{0pt}
|
|
\begin{itemize}
|
|
\item \alert{Frontend:} \uops{} not issued fast enough
|
|
\bigskip
|
|
|
|
\item \alert{Backend:} saturated execution units
|
|
\bigskip
|
|
|
|
\item \alert{Dependencies:} computation is stalled waiting
|
|
for previous results
|
|
\end{itemize}
|
|
\end{tightitemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
%\begin{frame}{Dependencies and the ROB}
|
|
% \begin{columns}
|
|
% \begin{column}{0.35\textwidth}
|
|
% \begin{center}
|
|
% \includegraphics[width=\textwidth]{cpu_frontend.svg}
|
|
% \end{center}
|
|
% \end{column}
|
|
% \hfill
|
|
% \begin{column}{0.64\textwidth}
|
|
% \begin{tightitemize}{0pt}
|
|
% \begin{itemize}
|
|
% \item Dependencies can stall execution
|
|
% \item Maybe instructions further down can be executed right now?
|
|
% \end{itemize}
|
|
% \begin{center}
|
|
% \textbf{\alert{$\to$ Out-of-Order CPUs}}
|
|
% \end{center}
|
|
% \begin{itemize}
|
|
% \item ROB: circular buffer of \uops{}
|
|
% \item First possible instruction is issued
|
|
% \end{itemize}
|
|
% \end{tightitemize}
|
|
% \end{column}
|
|
% \end{columns}
|
|
%\end{frame}
|
|
|
|
%\begin{frame}{How do we get insights from this complex system?}
|
|
% \textbf{Hardware counters}
|
|
% \begin{itemize}
|
|
% \item Built-in hardware, counters gathered at runtime
|
|
% \item Very accurate
|
|
% \item Available data varies from model to model
|
|
% \item May not even be available at all
|
|
% \end{itemize}
|
|
%
|
|
% \textbf{Simulation?}
|
|
% \begin{itemize}
|
|
% \item A modern CPU is \alert{$\sim$\,100e9 transistors}: very complex
|
|
% models!
|
|
% \item Very expensive, even for manufacturers for design validation
|
|
% \item CPU design is industrial secret $\leadsto$ not available anyway
|
|
% \item \ldots{}\ie{} not feasible.
|
|
% \end{itemize}
|
|
%\end{frame}
|
|
|
|
\begin{frame}[fragile]{What do we analyze?}
|
|
\begin{columns}
|
|
\column{\dimexpr\paperwidth-20pt}
|
|
\begin{minipage}[c]{0.60\textwidth}
|
|
Pieces of code referred as \alert{``microkernels''}:
|
|
|
|
\begin{itemize}
|
|
\item body of an (assumed) infinite loop;
|
|
\item in steady-state;
|
|
\item straight-line code (branches assumed not taken);
|
|
\item L1-resident (memory model is out of scope).
|
|
\end{itemize}
|
|
\end{minipage}
|
|
\hfill\begin{minipage}[c]{0.35\textwidth}
|
|
\begin{lstlisting}[language={[x86masm]Assembler}, numbers=none]
|
|
loop:
|
|
movsd (%rcx, %rax), %xmm0
|
|
mulsd %xmm1, %xmm0
|
|
addsd (%rdx, %rax), %xmm0
|
|
movsd %xmm0, (%rdx, %rax)
|
|
addq $8, %rax
|
|
cmpq $0x2260, %rax
|
|
jne loop\end{lstlisting}
|
|
\end{minipage}
|
|
\vspace{2em}
|
|
\begin{center}
|
|
Reasonable hypotheses for the category of codes worth optimizing this way!
|
|
\end{center}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Code analyzers}
|
|
\begin{itemize}
|
|
\item Predict performance of a microkernel
|
|
\item Features microarchitectural models
|
|
\item Most often static analyzers
|
|
\item Predict at least the \alert{\emph{reverse-throughput}} of a kernel (cycles per iteration)
|
|
\item May derive further useful metrics, \eg{} bottlenecks, by
|
|
inspecting their model at will
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Existing code analyzers}
|
|
\begin{minipage}[t]{0.55\textwidth}
|
|
\begin{block}{Behavioural}
|
|
\medskip
|
|
\begin{itemize}
|
|
\item \alert{\iaca{}}: Intel, proprietary. Intel CPUs only.
|
|
\item \alert{\llvmmca{}}: \texttt{llvm} project, FOSS.
|
|
\item \alert{\uica{}}, \alert{\uopsinfo{}}: academia. Intel CPUs
|
|
only.
|
|
\end{itemize}
|
|
\end{block}
|
|
\end{minipage}
|
|
\hfill
|
|
\begin{minipage}[t]{0.40\textwidth}
|
|
\begin{block}{ML-based}
|
|
\medskip
|
|
\begin{itemize}
|
|
\item \alert{\ithemal{}}: academia.
|
|
\end{itemize}
|
|
\end{block}
|
|
\end{minipage} \\
|
|
|
|
\vfill
|
|
|
|
Behavioural tools are (to some extent) based on \alert{manually-made}
|
|
models!\\
|
|
\pause{}
|
|
\bigskip{}
|
|
\begin{center}
|
|
\textbf{\alert{Ambition:}} \alert{automated} model generation.
|
|
\end{center}
|
|
\end{frame}
|
|
|
|
\begin{frame}{When I started my PhD\ldots}
|
|
\centering
|
|
%\includegraphics[height=0.9\textheight]{patate_placeholder.jpg}
|
|
\only<1>{\includegraphics[height=0.9\textheight]{sota_potato.svg}}%
|
|
\only<2>{\includegraphics[height=0.9\textheight]{sota_potato_eval.svg}}\\
|
|
\end{frame}
|