diff --git a/slides/20_foundations/main.tex b/slides/20_foundations/main.tex index 2269d0c..acd16bc 100644 --- a/slides/20_foundations/main.tex +++ b/slides/20_foundations/main.tex @@ -72,8 +72,8 @@ given CPU \item Features microarchitectural models \item Most often static analyzers - \item Predict at least the \emph{reverse-throughput} of a - kernel (cycles per iteration) + \item Predict at least the \emph{reverse-throughput} $\cyc{\kerK}$ of a + kernel $\kerK$ (cycles per iteration) \item May derive further useful metrics, \eg{} bottlenecks, by inspecting their model at will \end{itemize} diff --git a/slides/30_frontend/main.tex b/slides/30_frontend/main.tex index eb775e1..d665d89 100644 --- a/slides/30_frontend/main.tex +++ b/slides/30_frontend/main.tex @@ -1 +1,51 @@ \section{A frontend model for the Cortex A72} + +\begin{frame}{The Cortex A72} + \begin{itemize} + \item{} Low-power ARM CPU + \item{} CPU of the Raspberry Pi 4: easily available + \item{} Aarch64, NEON SIMD + \medskip{} + \item{} ARM CPUs not usually modeled! + \item{} Backend modeled by \palmed{} + \end{itemize} +\end{frame} + +\begin{frame} + \centering + \includegraphics[width=0.9\textwidth]{A72_pipeline_diagram.svg} +\end{frame} + +\begin{frame}{Manual model} + \begin{itemize} + \item Goal: manually craft a frontend model + \item Try to follow methods that can be automated + \item Propose a parametric model for future works, leaving question + marks on some sections + \end{itemize} +\end{frame} + +\begin{frame}{Counting \uops{}} + For an instruction $i$, denote \alert{$\mucount{i}$} its number of \uops{}. + \begin{itemize} + \item{} For $k \in \nat$, construct (if possible) $\kerK_k$ a kernel: + \begin{itemize} + \item instruction $i$ + $k$ ``simple'' instructions (one \uop) + \item frontend-bound: + \[ + \cyc{\kerK_k} = \dfrac{k + \mucount{i}}{3} + \] + \end{itemize} + \item{} For well-chosen $k_0$, we should have + \[ + \cyc{\kerK_{k_0}} + \sfrac{1}{3} = \cyc{\kerK_{k_0+1}} + \] + \item{} Measure to verify + \bigskip + \item{} If so, \textbf{\[ + \mucount{i} = 3 \cyc{\kerK_{k_0}} - k + \]} + + \end{itemize} + +\end{frame} diff --git a/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg new file mode 100644 index 0000000..6cad889 --- /dev/null +++ b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg @@ -0,0 +1,650 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fetch + + + + Decode,Rename,Dispatch + + + + Issue + + + + Branch + + + + Integer 0 + + + + Integer 1 + + + + Integer multi-cycle + + + + FP/SIMD 0 + + + + FP/SIMD 1 + + + + Load + + + + Store + + + + + + + + + + + + + + In-order + + + + Out-of-order + + + + Front-end + + + + + Back-end + + + 3μOPs + + diff --git a/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg.pdf b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg.pdf new file mode 100644 index 0000000..658c618 Binary files /dev/null and b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg.pdf differ