Continue definitions

This commit is contained in:
Théophile Bastian 2024-01-07 15:13:21 +01:00
parent d25a1476ae
commit c70ed526ee
2 changed files with 106 additions and 13 deletions

View file

@ -212,9 +212,37 @@ $\cyc{\kerK} = 1.5$.
\end{example}
\begin{remark}
Although we define $\cyc{\kerK}$ as the minimum over $\nat^*$, only so many
kernels may be aggregated until we find the minimum.
As $C(\kerK)$ depends on the microarchitecture of the processor considered,
the throughput $\cyc{\kerK}$ of a kernel $\kerK$ depends on the processor
considered.
\end{remark}
\medskip
Although we define $\cyc{\kerK}$ as the minimum over $\nat^*$, only so many
kernels may be aggregated until we find the minimum.
\begin{lemma}\label{lem:cyc_k_conv}
Given a kernel $\kerK$,
\begin{enumerate}[(i)]
\item{}\label{lem:cyc_k_conv:low_n} the minimum considered in the
definition of $\cyc{\kerK}$ is reached for a small value of $n \leq
N_0$, $N_0$ being commensurate to the complexity of the
microarchitecture considered.
\item{}\label{lem:cyc_k_conv:conv} Furthermore, the sequence converges
towards $\cyc{\kerK}$:
\[
\lim_{n \to \infty} \dfrac{C(\kerK^n)}{n} = \cyc{\kerK}
\]
\end{enumerate}
\end{lemma}
\begin{proof}
Indeed, as the number of resources that can be shared between instructions
in a processor is finite (and relatively small, usually on the order of
magnitude of 10), and their number of possible states is also finite (and
@ -225,19 +253,40 @@ $\cyc{\kerK} = 1.5$.
Thus, by the pigeon-hole principle, and as each state depends only on the
previous one, the states visited by $\left(C(\kerK^n)\right)_{n \in
\nat^*}$ are periodic of period $p$. Take $r_0 \in \nat$ realizing
$\min_{0 < r \leq p}\left(\sfrac{C(\kerK^r)}{r}\right)$. As we are by hypothesis in
steady-state already, we have for any $n \in \nat^*$ such that $n = kp+r$,
$0 < r \leq p$, $k, r \in \nat$,
\nat^*}$ are periodic of period $p$. As such, and as we are by hypothesis
in steady-state already (and not only periodic from a certain rank), for
any $n \geq p$, we have
\[
C(\kerK^n) = C(\kerK^{n-p}) + C(\kerK^p)
\]
Take $r_0 \in \nat^*$ realizing
$\min_{0 < r \leq p}\left(\sfrac{C(\kerK^r)}{r}\right)$.
For any $n \in \nat^*$ such that $n = kp+r$, $0 < r \leq p$, $k, r \in \nat$,
\begin{align*}
C(\kerK^n) &= k \cdot C(\kerK^p) + C(\kerK^r) \\
&\geq k \cdot C(\kerK^{r_0}) + C(\kerK^{r_0}) \\
&\geq (k+1) \cdot C(\kerK^{r_0}) \\
\implies \dfrac{C(\kerK^n)}{n} &\geq k \cdot \dfrac{C(\kerK^p)}{n} +
\dfrac{C(\kerK^r)}{n}
C(\kerK^n) &= k \cdot C(\kerK^p) + C(\kerK^r) & \textit{(by induction)} \\
&= kp \dfrac{C(\kerK^p)}{p} + r \dfrac{C(\kerK^r)}{r} \\
&\geq kp \cdot \dfrac{C(\kerK^{r_0})}{r_0} + r \dfrac{C(\kerK^{r_0})}{r_0} \\
&\geq (kp+r) \dfrac{C(\kerK^{r_0})}{r_0} \\
&\geq n \dfrac{C(\kerK^{r_0})}{r_0} \\
\implies \dfrac{C(\kerK^n)}{n} &\geq \dfrac{C(\kerK^{r_0})}{r_0} = \cyc{\kerK}
\end{align*}
Thus, $r_0$ realizes the minimum from the definition of $\cyc{\kerK}$, with
$r_0 \geq p$, commensurate with the complexity of the microarchitecture,
proving~(\ref{lem:cyc_k_conv:low_n}).
\medskip{}
For any $n > r_0$, we decompose $n = r_0 + m$ and $m = k'p + r'$, $0 < r'
\leq p$, $k', r' \in \nat$.
\begin{align*}
C(\kerK^n) = C(\kerK^{r_0}) + k'p \dfrac{C(\kerK^p)}{p} +
\end{align*}
\todo{}
\end{remark}
\end{proof}
\medskip
@ -258,6 +307,47 @@ stead.
In the literature or in analyzers' reports, the throughput of a kernel is often
referred to as its \emph{IPC} (its unit).
\todo{Measure of $\cyc{\kerK}$}
\begin{notation}[Experimental measure of $\cyc{\kerK}$]
We note $\cycmes{\kerK}{n}$ the experimental measure of $\kerK$, realized
by:
\begin{itemize}
\item sampling the hardware counter of total number of instructions
retired and the counter of total number of cycles elapsed,
\item executing $\kerK^n$,
\item sampling again the same counters, and noting respectively
$\Delta_n\text{ret}$ and $\Delta_{n}C$ their differences,
\item noting $\cycmes{\kerK}{n} = \dfrac{\Delta_{n}C\cdot
\card{\kerK}}{\Delta_n\text{ret}}$, where $\card{\kerK}$ is the
number of instructions in $\kerK$.
\end{itemize}
\end{notation}
\begin{lemma}
For any kernel $\kerK$,
$\cycmes{\kerK}{n} \xrightarrow[n \to \infty]{} \cyc{\kerK}$.
\end{lemma}
\begin{proof}
For an integer number of kernel iterations $n$,
$\sfrac{\Delta_n\text{ret}}{\card{\kerK}} = n$. While measurement
errors may make $\Delta_{n}\text{ret}$ fluctuate slightly, this
fluctuation will be below a constant threshold:
\[
\abs{\dfrac{\Delta_n\text{ret}}{\card{\kerK}} - n}
\leq E_\text{ret}
\]
The same way, and due to the pipelining effects we noted below
the definition of $\cyc{\kerK}$,
\[
\abs{\Delta_{n}C - C(\kerK^n)} \leq E_C
\]
with $E_C$ a constant.
As such, for a given $n$, \todo{}
\end{proof}
Given this property, we will use $\cyc{\kerK}$ to refer to $\cycmes{\kerK}{n}$
for large values of $n$ in this manuscript whenever it is clear that this value
is a measure.
\subsubsection{Basic block of an assembly-level program}

View file

@ -10,9 +10,12 @@
\newcommand{\calI}{\mathcal{I}}
\newcommand{\calB}{\mathcal{B}}
\newcommand{\abs}[1]{\left| #1 \right|}
\newcommand{\cyc}[1]{\overline{#1}}
\newcommand{\cycF}[1]{\overline{#1}^\textbf{F}}
\newcommand{\cycB}[1]{\overline{#1}^\textbf{B}}
\newcommand{\cycmes}[2]{\overline{#1}^{\textit{M(}#2\textit{)}}}
\newcommand{\nat}{\mathbb{N}}