113 lines
3.7 KiB
TeX
113 lines
3.7 KiB
TeX
\section{A frontend model for the Cortex A72}
|
|
|
|
\begin{frame}{The Cortex A72}
|
|
\begin{itemize}
|
|
\item{} Low-power ARM CPU
|
|
\item{} CPU of the Raspberry Pi 4: easily available
|
|
\item{} Aarch64, NEON SIMD
|
|
\medskip{}
|
|
\item{} ARM CPUs not usually modeled!
|
|
\item{} Backend modeled by \palmed{}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\centering
|
|
\includegraphics[width=0.9\textwidth]{A72_pipeline_diagram.svg}
|
|
\todo{Dispatch queues}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Manual model}
|
|
\begin{itemize}
|
|
\item Goal: manually craft a frontend model
|
|
\item Try to follow methods that can be automated
|
|
\item Propose a parametric model for future works, leaving question
|
|
marks on some sections
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Proposed parametric model}
|
|
\vfill
|
|
\centering
|
|
\begin{minipage}[t][0.7\textheight][c]{0.53\textwidth}
|
|
\centering
|
|
Globally,
|
|
\vfill
|
|
\includegraphics[width=\textwidth]{parametric_model-frontend.svg}
|
|
\vfill~ % I hate LaTeX
|
|
\end{minipage}
|
|
\hfill\vrule\hfill
|
|
\begin{minipage}[t][0.7\textheight][c]{0.43\textwidth}
|
|
\centering
|
|
For each instruction,
|
|
\vfill
|
|
\includegraphics[width=\textwidth]{parametric_model-insn.svg}
|
|
\vfill~
|
|
\end{minipage}
|
|
|
|
\vfill
|
|
In {\color{red}\textbf{red}}, parameters of the model.
|
|
\end{frame}
|
|
|
|
\begin{frame}{Counting \uops{}}
|
|
For an instruction $i$, denote \alert{$\mucount{i}$} its number of \uops{}.
|
|
\begin{itemize}
|
|
\item{} For $k \in \nat$, construct (if possible) $\kerK_k$ a kernel:
|
|
\begin{itemize}
|
|
\item instruction $i$ + $k$ ``simple'' instructions (one \uop)
|
|
\item frontend-bound:
|
|
\[
|
|
\cyc{\kerK_k} = \dfrac{k + \mucount{i}}{3}
|
|
\]
|
|
\end{itemize}
|
|
\item{} For well-chosen $k_0$, we should have
|
|
\[
|
|
\cyc{\kerK_{k_0}} + \sfrac{1}{3} = \cyc{\kerK_{k_0+1}}
|
|
\]
|
|
\item{} Measure to verify
|
|
\bigskip
|
|
\item{} If so, \textbf{\[
|
|
\mucount{i} = 3 \cyc{\kerK_{k_0}} - k_0
|
|
\]}
|
|
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Evaluation: comparison to bare \palmed}
|
|
\begin{itemize}
|
|
\item Add a frontend to \palmed{}:
|
|
\[
|
|
\cyc{\kerK}_{\text{pred.}} = \max(\texttt{palmed}(\kerK), \texttt{frontend}(\kerK))
|
|
\]
|
|
\item Reuse evaluation suite of \palmed{}: SPEC CPU 2017 + Polybench
|
|
\item Compare to \llvmmca{}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Results}
|
|
\centering
|
|
\begin{tabular}{l l c r r r r r}
|
|
\toprule
|
|
& & & \multirow{2}{*}{\llvmmca{}} & \multicolumn{3}{c}{\palmed{} with
|
|
frontend\ldots} \\
|
|
& & & & none & linear & disp.\ queues \\
|
|
\midrule{}
|
|
\multirow{3}{*}{SPEC} & Cov. & (\%) & 100.00 & \na{} & 97.21 & 97.16 \\
|
|
& Err. & (\%) & 9.0 & 20.1 & 6.2 & 4.6 \\
|
|
& $\ktau$ & (1)& 0.83 & 0.88 & 0.91 & 0.93 \\
|
|
\midrule
|
|
\multirow{3}{*}{Polybench} & Cov. & (\%) & 100.00& \na{} & 99.33 & 99.33 \\
|
|
& Err. & (\%) & 13.9 & 12.6 & 8.1 & 8.0 \\
|
|
& $\ktau$ & (1)& 0.47 & 0.82 & 0.88 & 0.90 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Limitations}
|
|
\begin{itemize}
|
|
\item Parts of this model were entirely manually solved (\eg{} \# of
|
|
dispatch queues)
|
|
\item Evaluation based on \palmed{} suite: biased
|
|
\item Must be tested on other architectures!
|
|
\end{itemize}
|
|
\end{frame}
|