phd-defense/slides/30_frontend/main.tex

\section{A frontend model for the Cortex A72}

\begin{frame}{The Cortex A72}
    \begin{itemize}
        \item{} Low-power ARM CPU
        \item{} CPU of the Raspberry Pi 4: easily available
        \item{} Aarch64, NEON SIMD
            \medskip{}
        \item{} ARM CPUs not usually modeled!
        \item{} Backend modeled by \palmed{}
    \end{itemize}
\end{frame}

\begin{frame}
    \centering
    \includegraphics[width=0.9\textwidth]{A72_pipeline_diagram.svg}
    \todo{Dispatch queues}
\end{frame}

\begin{frame}{Manual model}
    \begin{itemize}
        \item Goal: manually craft a frontend model
        \item Try to follow methods that can be automated
        \item Propose a parametric model for future works, leaving question
            marks on some sections
    \end{itemize}
\end{frame}

\begin{frame}{Proposed parametric model}
    \vfill
    \centering
    \begin{minipage}[t][0.7\textheight][c]{0.53\textwidth}
        \centering
        Globally,
        \vfill
        \includegraphics[width=\textwidth]{parametric_model-frontend.svg}
        \vfill~  % I hate LaTeX
    \end{minipage}
    \hfill\vrule\hfill
    \begin{minipage}[t][0.7\textheight][c]{0.43\textwidth}
        \centering
        For each instruction,
        \vfill
        \includegraphics[width=\textwidth]{parametric_model-insn.svg}
        \vfill~
    \end{minipage}

    \vfill
    In {\color{red}\textbf{red}}, parameters of the model.
\end{frame}

\begin{frame}{Counting \uops{}}
    For an instruction $i$, denote \alert{$\mucount{i}$} its number of \uops{}.
    \begin{itemize}
        \item{} For $k \in \nat$, construct (if possible) $\kerK_k$ a kernel:
            \begin{itemize}
                \item instruction $i$ + $k$ ``simple'' instructions (one \uop)
                \item frontend-bound:
                    \[
                        \cyc{\kerK_k} = \dfrac{k + \mucount{i}}{3}
                    \]
            \end{itemize}
        \item{} For well-chosen $k_0$, we should have
            \[
                \cyc{\kerK_{k_0}} + \sfrac{1}{3} = \cyc{\kerK_{k_0+1}}
            \]
        \item{} Measure to verify
            \bigskip
        \item{} If so, \textbf{\[
                \mucount{i} = 3 \cyc{\kerK_{k_0}} - k
        \]}

    \end{itemize}
\end{frame}

\begin{frame}{Evaluation: comparison to bare \palmed}
    \begin{itemize}
        \item Add a frontend to \palmed{}:
            \[
                \cyc{\kerK}_{\text{pred.}} = \max(\texttt{palmed}(\kerK), \texttt{frontend}(\kerK))
            \]
        \item Reuse evaluation suite of \palmed{}: SPEC CPU 2017 + Polybench
        \item Compare to \llvmmca{}
    \end{itemize}
\end{frame}

\begin{frame}{Results}
    \centering
    \begin{tabular}{l l c r r r r r}
        \toprule
        & & & \multirow{2}{*}{\llvmmca{}} & \multicolumn{3}{c}{\palmed{} with
        frontend\ldots} \\
        & & & & none & linear & disp.\ queues \\
        \midrule{}
        \multirow{3}{*}{SPEC} & Cov. & (\%)  & 100.00 & \na{} & 97.21 & 97.16 \\
                              & Err. & (\%)  & 9.0   & 20.1 & 6.2 & 4.6 \\
                              & $\ktau$ & (1)& 0.83  & 0.88 & 0.91 & 0.93 \\
        \midrule
        \multirow{3}{*}{Polybench} & Cov. & (\%)  & 100.00& \na{} & 99.33 & 99.33 \\
                                   & Err. & (\%)  & 13.9 & 12.6 & 8.1 & 8.0 \\
                                   & $\ktau$ & (1)& 0.47 & 0.82 & 0.88 & 0.90 \\
        \bottomrule
    \end{tabular}
\end{frame}

\begin{frame}{Limitations}
    \begin{itemize}
        \item Parts of this model were entirely manually solved (\eg{} \# of
            dispatch queues)
        \item Evaluation based on \palmed{} suite: biased
        \item Must be tested on other architectures!
    \end{itemize}
\end{frame}
Some writeup 2024-11-20 12:54:09 +01:00			`\section{A frontend model for the Cortex A72}`
Further writeup, start A72 2024-11-22 14:14:32 +01:00
			`\begin{frame}{The Cortex A72}`
			`\begin{itemize}`
			`\item{} Low-power ARM CPU`
			`\item{} CPU of the Raspberry Pi 4: easily available`
			`\item{} Aarch64, NEON SIMD`
			`\medskip{}`
			`\item{} ARM CPUs not usually modeled!`
			`\item{} Backend modeled by \palmed{}`
			`\end{itemize}`
			`\end{frame}`

			`\begin{frame}`
			`\centering`
			`\includegraphics[width=0.9\textwidth]{A72_pipeline_diagram.svg}`
Frontend: progress 2024-11-24 18:56:01 +01:00			`\todo{Dispatch queues}`
Further writeup, start A72 2024-11-22 14:14:32 +01:00			`\end{frame}`

			`\begin{frame}{Manual model}`
			`\begin{itemize}`
			`\item Goal: manually craft a frontend model`
			`\item Try to follow methods that can be automated`
			`\item Propose a parametric model for future works, leaving question`
			`marks on some sections`
			`\end{itemize}`
			`\end{frame}`

Frontend: progress 2024-11-24 18:56:01 +01:00			`\begin{frame}{Proposed parametric model}`
			`\vfill`
			`\centering`
			`\begin{minipage}[t][0.7\textheight][c]{0.53\textwidth}`
			`\centering`
			`Globally,`
			`\vfill`
			`\includegraphics[width=\textwidth]{parametric_model-frontend.svg}`
			`\vfill~ % I hate LaTeX`
			`\end{minipage}`
			`\hfill\vrule\hfill`
			`\begin{minipage}[t][0.7\textheight][c]{0.43\textwidth}`
			`\centering`
			`For each instruction,`
			`\vfill`
			`\includegraphics[width=\textwidth]{parametric_model-insn.svg}`
			`\vfill~`
			`\end{minipage}`

			`\vfill`
			`In {\color{red}\textbf{red}}, parameters of the model.`
			`\end{frame}`

Further writeup, start A72 2024-11-22 14:14:32 +01:00			`\begin{frame}{Counting \uops{}}`
			`For an instruction $i$, denote \alert{$\mucount{i}$} its number of \uops{}.`
			`\begin{itemize}`
			`\item{} For $k \in \nat$, construct (if possible) $\kerK_k$ a kernel:`
			`\begin{itemize}`
			\item instruction $i$ + $k$ ``simple'' instructions (one \uop)
			`\item frontend-bound:`
			`\[`
			`\cyc{\kerK_k} = \dfrac{k + \mucount{i}}{3}`
			`\]`
			`\end{itemize}`
			`\item{} For well-chosen $k_0$, we should have`
			`\[`
			`\cyc{\kerK_{k_0}} + \sfrac{1}{3} = \cyc{\kerK_{k_0+1}}`
			`\]`
			`\item{} Measure to verify`
			`\bigskip`
			`\item{} If so, \textbf{\[`
			`\mucount{i} = 3 \cyc{\kerK_{k_0}} - k`
			`\]}`

			`\end{itemize}`
Frontend: progress 2024-11-24 18:56:01 +01:00			`\end{frame}`

			`\begin{frame}{Evaluation: comparison to bare \palmed}`
			`\begin{itemize}`
			`\item Add a frontend to \palmed{}:`
			`\[`
			`\cyc{\kerK}_{\text{pred.}} = \max(\texttt{palmed}(\kerK), \texttt{frontend}(\kerK))`
			`\]`
			`\item Reuse evaluation suite of \palmed{}: SPEC CPU 2017 + Polybench`
			`\item Compare to \llvmmca{}`
			`\end{itemize}`
			`\end{frame}`

			`\begin{frame}{Results}`
			`\centering`
			`\begin{tabular}{l l c r r r r r}`
			`\toprule`
			`& & & \multirow{2}{*}{\llvmmca{}} & \multicolumn{3}{c}{\palmed{} with`
			`frontend\ldots} \\`
			`& & & & none & linear & disp.\ queues \\`
			`\midrule{}`
			`\multirow{3}{*}{SPEC} & Cov. & (\%) & 100.00 & \na{} & 97.21 & 97.16 \\`
			`& Err. & (\%) & 9.0 & 20.1 & 6.2 & 4.6 \\`
			`& $\ktau$ & (1)& 0.83 & 0.88 & 0.91 & 0.93 \\`
			`\midrule`
			`\multirow{3}{*}{Polybench} & Cov. & (\%) & 100.00& \na{} & 99.33 & 99.33 \\`
			`& Err. & (\%) & 13.9 & 12.6 & 8.1 & 8.0 \\`
			`& $\ktau$ & (1)& 0.47 & 0.82 & 0.88 & 0.90 \\`
			`\bottomrule`
			`\end{tabular}`
			`\end{frame}`
Further writeup, start A72 2024-11-22 14:14:32 +01:00
Frontend: progress 2024-11-24 18:56:01 +01:00			`\begin{frame}{Limitations}`
			`\begin{itemize}`
			`\item Parts of this model were entirely manually solved (\eg{} \# of`
			`dispatch queues)`
			`\item Evaluation based on \palmed{} suite: biased`
			`\item Must be tested on other architectures!`
			`\end{itemize}`
Further writeup, start A72 2024-11-22 14:14:32 +01:00			`\end{frame}`