From 63acbb76341cf4c2590b025006415e776191a311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Sun, 1 Dec 2024 21:20:44 +0100 Subject: [PATCH] Hack through Foundations (lightly) to reduce time --- slides/20_foundations/main.tex | 100 +++++----- .../imgs/20_foundations/cpu_big_picture.svg | 21 +-- .../cpu_big_picture_truncate.svg | 23 ++- .../imgs/20_foundations/cpu_frontend.svg | 13 +- .../imgs/20_foundations/sota_potato.svg | 176 ++++++++++++++++++ 5 files changed, 251 insertions(+), 82 deletions(-) create mode 100644 slides/assets/imgs/20_foundations/sota_potato.svg diff --git a/slides/20_foundations/main.tex b/slides/20_foundations/main.tex index f7ed612..3a16b0a 100644 --- a/slides/20_foundations/main.tex +++ b/slides/20_foundations/main.tex @@ -16,72 +16,68 @@ \begin{column}{0.62\textwidth} \begin{tightitemize}{0pt} \begin{itemize} - \item \alert{Frontend:} \uops{} not issued fast enough; - issuing faster would speed up computation; + \item \alert{Frontend:} \uops{} not issued fast enough \bigskip - \item \alert{Backend:} saturated execution units; adding - more units would speed up computation; + \item \alert{Backend:} saturated execution units \bigskip \item \alert{Dependencies:} computation is stalled waiting - for previous results; removing data dependencies would - speed up computation. + for previous results \end{itemize} \end{tightitemize} \end{column} \end{columns} \end{frame} -\begin{frame}{Dependencies and the ROB} - \begin{columns} - \begin{column}{0.35\textwidth} - \begin{center} - \includegraphics[width=\textwidth]{cpu_frontend.svg} - \end{center} - \end{column} - \hfill - \begin{column}{0.64\textwidth} - \begin{tightitemize}{0pt} - \begin{itemize} - \item Dependencies can stall execution - \item Maybe instructions further down can be executed right now? - \end{itemize} - \begin{center} - \textbf{\alert{$\to$ Out-of-Order CPUs}} - \end{center} - \begin{itemize} - \item ROB: circular buffer of \uops{} - \item First possible instruction is issued - \end{itemize} - \end{tightitemize} - \end{column} - \end{columns} -\end{frame} +%\begin{frame}{Dependencies and the ROB} +% \begin{columns} +% \begin{column}{0.35\textwidth} +% \begin{center} +% \includegraphics[width=\textwidth]{cpu_frontend.svg} +% \end{center} +% \end{column} +% \hfill +% \begin{column}{0.64\textwidth} +% \begin{tightitemize}{0pt} +% \begin{itemize} +% \item Dependencies can stall execution +% \item Maybe instructions further down can be executed right now? +% \end{itemize} +% \begin{center} +% \textbf{\alert{$\to$ Out-of-Order CPUs}} +% \end{center} +% \begin{itemize} +% \item ROB: circular buffer of \uops{} +% \item First possible instruction is issued +% \end{itemize} +% \end{tightitemize} +% \end{column} +% \end{columns} +%\end{frame} -\begin{frame}{How do we get insights from this complex system?} - \textbf{Hardware counters} - \begin{itemize} - \item Built-in hardware, counters gathered at runtime - \item Very accurate - \item Available data varies from model to model - \item May not even be available at all - \end{itemize} +%\begin{frame}{How do we get insights from this complex system?} +% \textbf{Hardware counters} +% \begin{itemize} +% \item Built-in hardware, counters gathered at runtime +% \item Very accurate +% \item Available data varies from model to model +% \item May not even be available at all +% \end{itemize} +% +% \textbf{Simulation?} +% \begin{itemize} +% \item A modern CPU is \alert{$\sim$\,100e9 transistors}: very complex +% models! +% \item Very expensive, even for manufacturers for design validation +% \item CPU design is industrial secret $\leadsto$ not available anyway +% \item \ldots{}\ie{} not feasible. +% \end{itemize} +%\end{frame} - \textbf{Simulation?} +\begin{frame}{Code analyzers} \begin{itemize} - \item A modern CPU is \alert{$\sim$\,100e9 transistors}: very complex - models! - \item Very expensive, even for manufacturers for design validation - \item CPU design is industrial secret $\leadsto$ not available anyway - \item \ldots{}\ie{} not feasible. - \end{itemize} -\end{frame} - -\begin{frame}{Enter code analyzers} - \begin{itemize} - \item Tools that predict performance of a piece of assembly code on a - given CPU + \item That predict performance of a piece of assembly \item Features microarchitectural models \item Most often static analyzers \item Predict at least the \emph{reverse-throughput} $\cyc{\kerK}$ of a diff --git a/slides/assets/imgs/20_foundations/cpu_big_picture.svg b/slides/assets/imgs/20_foundations/cpu_big_picture.svg index 497a5bc..e1801c3 100644 --- a/slides/assets/imgs/20_foundations/cpu_big_picture.svg +++ b/slides/assets/imgs/20_foundations/cpu_big_picture.svg @@ -7,7 +7,7 @@ viewBox="0 0 260 220" version="1.1" id="svg1" - inkscape:version="1.3 (0e150ed6c4, 2023-07-21)" + inkscape:version="1.3.2 (091e20ef0f, 2023-11-25, custom)" sodipodi:docname="cpu_big_picture.svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" @@ -23,12 +23,12 @@ inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" - inkscape:zoom="0.92729075" - inkscape:cx="426.51132" - inkscape:cy="471.80456" + inkscape:zoom="1.8545815" + inkscape:cx="396.58543" + inkscape:cy="226.19658" inkscape:window-width="1916" inkscape:window-height="1041" - inkscape:window-x="0" + inkscape:window-x="1920" inkscape:window-y="18" inkscape:window-maximized="1" inkscape:current-layer="layer1" /> @@ -375,11 +375,10 @@ y="45.209263">Decoder + id="g1"> + transform="matrix(1.000053,0,0,0.66658008,39.808405,12.934493)"> Renamer + y="42.017448">… + y="24.500828" /> diff --git a/slides/assets/imgs/20_foundations/cpu_big_picture_truncate.svg b/slides/assets/imgs/20_foundations/cpu_big_picture_truncate.svg index ffb7fba..96c306d 100644 --- a/slides/assets/imgs/20_foundations/cpu_big_picture_truncate.svg +++ b/slides/assets/imgs/20_foundations/cpu_big_picture_truncate.svg @@ -23,13 +23,13 @@ inkscape:pagecheckerboard="0" inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" - inkscape:zoom="0.92729075" - inkscape:cx="514.94097" - inkscape:cy="422.19768" + inkscape:zoom="6.1940124" + inkscape:cx="348.23953" + inkscape:cy="148.61126" inkscape:window-width="1916" inkscape:window-height="1041" - inkscape:window-x="0" - inkscape:window-y="0" + inkscape:window-x="1920" + inkscape:window-y="18" inkscape:window-maximized="1" inkscape:current-layer="layer1" /> Decoder + id="g1"> + transform="matrix(1.000053,0,0,0.66658008,49.997613,11.267066)"> Renamer + x="92.17778" + y="40.350021">… Decoder + id="g1"> + transform="matrix(1.000053,0,0,0.66658008,49.997613,11.267066)"> Renamer + x="92.17778" + y="40.350021">… + + + + + + + + + + + + + + + + + Backend + Frontend + Dependencies + + + Palmed + + + Through registers(widespread) + +