From 3180ae0e01a1847a590e5305cdbbd14619d2d50c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= Date: Fri, 22 Nov 2024 14:14:32 +0100 Subject: [PATCH] Further writeup, start A72 --- slides/20_foundations/main.tex | 4 +- slides/30_frontend/main.tex | 50 ++ .../imgs/30_frontend/A72_pipeline_diagram.svg | 650 ++++++++++++++++++ .../30_frontend/A72_pipeline_diagram.svg.pdf | Bin 0 -> 13319 bytes 4 files changed, 702 insertions(+), 2 deletions(-) create mode 100644 slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg create mode 100644 slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg.pdf diff --git a/slides/20_foundations/main.tex b/slides/20_foundations/main.tex index 2269d0c..acd16bc 100644 --- a/slides/20_foundations/main.tex +++ b/slides/20_foundations/main.tex @@ -72,8 +72,8 @@ given CPU \item Features microarchitectural models \item Most often static analyzers - \item Predict at least the \emph{reverse-throughput} of a - kernel (cycles per iteration) + \item Predict at least the \emph{reverse-throughput} $\cyc{\kerK}$ of a + kernel $\kerK$ (cycles per iteration) \item May derive further useful metrics, \eg{} bottlenecks, by inspecting their model at will \end{itemize} diff --git a/slides/30_frontend/main.tex b/slides/30_frontend/main.tex index eb775e1..d665d89 100644 --- a/slides/30_frontend/main.tex +++ b/slides/30_frontend/main.tex @@ -1 +1,51 @@ \section{A frontend model for the Cortex A72} + +\begin{frame}{The Cortex A72} + \begin{itemize} + \item{} Low-power ARM CPU + \item{} CPU of the Raspberry Pi 4: easily available + \item{} Aarch64, NEON SIMD + \medskip{} + \item{} ARM CPUs not usually modeled! + \item{} Backend modeled by \palmed{} + \end{itemize} +\end{frame} + +\begin{frame} + \centering + \includegraphics[width=0.9\textwidth]{A72_pipeline_diagram.svg} +\end{frame} + +\begin{frame}{Manual model} + \begin{itemize} + \item Goal: manually craft a frontend model + \item Try to follow methods that can be automated + \item Propose a parametric model for future works, leaving question + marks on some sections + \end{itemize} +\end{frame} + +\begin{frame}{Counting \uops{}} + For an instruction $i$, denote \alert{$\mucount{i}$} its number of \uops{}. + \begin{itemize} + \item{} For $k \in \nat$, construct (if possible) $\kerK_k$ a kernel: + \begin{itemize} + \item instruction $i$ + $k$ ``simple'' instructions (one \uop) + \item frontend-bound: + \[ + \cyc{\kerK_k} = \dfrac{k + \mucount{i}}{3} + \] + \end{itemize} + \item{} For well-chosen $k_0$, we should have + \[ + \cyc{\kerK_{k_0}} + \sfrac{1}{3} = \cyc{\kerK_{k_0+1}} + \] + \item{} Measure to verify + \bigskip + \item{} If so, \textbf{\[ + \mucount{i} = 3 \cyc{\kerK_{k_0}} - k + \]} + + \end{itemize} + +\end{frame} diff --git a/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg new file mode 100644 index 0000000..6cad889 --- /dev/null +++ b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg @@ -0,0 +1,650 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fetch + + + + Decode,Rename,Dispatch + + + + Issue + + + + Branch + + + + Integer 0 + + + + Integer 1 + + + + Integer multi-cycle + + + + FP/SIMD 0 + + + + FP/SIMD 1 + + + + Load + + + + Store + + + + + + + + + + + + + + In-order + + + + Out-of-order + + + + Front-end + + + + + Back-end + + + 3μOPs + + diff --git a/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg.pdf b/slides/assets/imgs/30_frontend/A72_pipeline_diagram.svg.pdf new file mode 100644 index 0000000000000000000000000000000000000000..658c6182b833dee10629d2a737021b20d949d4e2 GIT binary patch literal 13319 zcmd6Oby!qg)bFbzC9R|&gLKafFbpl-9nvs#4mG5J(lsC@9nwgLw1Tt*&#tzlG?lHmk#@!>MoZC0G(@&bSWCo?--VPOEb zk|o?4VFTbpi8OEl006g)tpmc+6?J!jAuOdV&7CYPaYaRO-4L#pFh^XkjPhhL8M5CT@{oQ?8rJ~E8He=8nq!T*ABtOl8T+4{ZgKrzmu{Q0{l9?JMl zgYneylJDj!8GHq5CS+B5O25>2aUsdb5^}+U3{DcQ@(_?-oxj>c&AfZYxOq@pCn@3t z0u^bby$;BonprE6eCM=SzF%}6M19{5lyw@+w&>M0^`y?ev9^~hHE*x16vj@=H7 zwU-RK;lpRN(14sibwQkOE`7K4r)A0Xr6DJ#0#Yyd~jxby1A5+dQvBkl4bTJoC z8g$aTJI%@m^lU`ezAkW0?Bx62=9I|~Anuj@w8Fs)CIBYq;<#v7(Bv;j$JB6|(DKi| z`_fk1yQ@KfQ*>kUR#21#)?Eg*7}kyNYw4;F8)!w1vndT7K+&#lIk)WVEnMzy1Z+c` zzlD&u(7N}Wb32p0`Sie5KatP}F^cgWjx-O1D)^xdEGvZCl94}ih_h4;CJWrm2 zXN{WM%SMq^g7=mlTDsp6zom8gAZdO7aAAvGH#&{HOti$h&TskPu2GFGM=|8ySH;B9 z=b1}7KZ@Za-xjdcLJD zaG&OrfFK9CvgHwPlH`Sca6^a0NWZS0(44wo8JwodwWC2d8tbrXoN@Dd&Ru(J61*T2 z=2_er$!ZKG%u!pvzKnTfekFTdJbr#qPW{DyV{X->(qakXW6}LIa z+_}^qPx8i#pJMKsO-(~HeVk~%*nQ46BOd6ubHagY5!Ks8>uEmx;&8NjP}0niq`V!w z8M~yq@l3rO!>Ikao^zIA+K{Ii0~EOWZATPiVDtmFF)KzbKgkr#m9-ap0~;TM2HvXc zNwdZI%F4fz&JmX-4LQ?t83zjm2ycjnLt5&2HcBSq^%ML|v!rbIZDh*Q`jCG9`3N$@;Oj zp(G|PHIq9q*;OFXm3TlTS3`7bJn^7cIyrz1$C1%8%Ze!HW9G?>cVG5~ zyiVM9Xm#iLf^jY37Qb8z>Iv{1&R>=WlKqCph9S+E$!EP9OmC+wY+}K#M>C{@)^0 zJJkPQnpx81p z|9CeD{5h@vdN-gA_wywJ0tf(kp?`Z>Ea^Qm)Yfh_vA;Z+_?F?9bt>%jc9HsCSXekw zcx2j!+3ef#Zg~Li2YH)+HP`~T0RI_+=nIVz9 zLT~q$LHMHCVfU6+GD*QvUShFX+}<+@-Pzt(A52eADxbd?gmF9IH&*Vj_!u7GL``XF_kt`85NH6wW=0M9hjxvq>o}=&uz^ZK*Fuq4h5wrT$_wc0-#i$Ph>6fFb zNACWiOb32a_~jil?)#3A`PQbQ4@fF^nRI8kv%v&siv0t_(vX}yA?3aSC6OP2Slu%2sZBvW znyN|&!%&ihxQdUKFOij*>OMcd7EEhvmjzrl7q(UVSe;kQ;@;##+_$^iE92dvooLO{ zr_m|LXv93hR=)lfBJFSst_{KCftFj8Jo*8YHF>D-`QQnG|9RSCl>Lj6FZY(QOR*>K zj8gRQ&aVV+HwJ|=+$oo5VdKQE_%M~ayRiD1dwC<{IOz59`I%9gQbCJn6eU(de1&17 zGb8H@0Z5WcK4dL_(QMC~Mt0+lJUp?Fh3}3MhJ#i~4QqLlBbXFvQ8;43YY$#kX^Hu) zkowW@b)JC>W}Yza^&jEtGTpYH+rto+_w6|4IP8p?Y7R^d3=X0cO++|(_b+vA#z&k> zUbHE=9`bXroEUbJn4`XV#XkLq-$jE;`v^GU#<~m^=@R>(Iiq=o$FxGt-0d+^0| zT0;@ftu-iMt+1Zvua?&CB;c#0Gxxl?T&}ZOWekHLA1&%i7#|NWw zfFU)K`}0y3hYygY-!cnFJa>Z5NJ|LTgvs5vI`*k`c$8#ryp6wpA1Hl? z&VVEWOG>r8&&b}$kt`J>G9no3OA0j=8=D?nVCue|z4E#wHl{V(HAAJwn6s6EVU0E9 z7dJeDgVdy*kXI}8b_@&B@BIg)DPf5?BotDFuBist64y>5vQI*w;mHelE3Wx;KROz@${a)@oDvRFb;+ z?aKEFlT)#VwhFpaA^kb;&-A{t*F`RaZbse=sviDg;_RG?p9AmetKAEgr zNfQo{l|zq&UvXmEvW`mx(n+NUkt4VIT%b>P$%*YExlUp#*8Oa#i{ieKYK8UeiYZ6S zMJw$Wo@nH0WVDoA0mx%HyM1P2Q8!@o6eB_$zn(MZpxG*N8C9UIK>Z`48pU2B#~#iW!LgHcq>#&YpZyhSYh-cknUmgBIbs0taS<#Nt@0EX zFcq7lCZ@k}23X@EC- z#0SR81qmHMvW<&Oyj_AyM3{Wbn__haP2|dtX1+gF z*+b5q8>!(04alxJSg?1no{%(~%h#+k@Wxue z=HoNv9d*#UnuG+rLXh(vlhTc)pPtHK_^UB4ud(}8gr7o9J zd=BBK299@EWYdOzOWOZH7Be6E96xu2fjnl^k>J-z4YvKK28 zG}ur-I2qP7XwoC9JkTjCo9=%E-+O#NWBqZP-Fax81Sq@lD2|cw z)L6jq_2lOIl>fouHPOIT7M_s&5_lhS!#tGDp!CHwKvBjlV`11qA^CRCBKe^GwWCke zV_eD0oYyHXLNxC zCV{!?GzU&>d?X|POf?I)tIeh+BPLfsCJ8YVedZTXPk&=7H}r`VTj7Y&kvTRZu5UYX zKDx8+?NE#|Pre)wTgaA?YK58|^(BAL6nh$z_dr?@K67T}NTVHIM>U$) z>kFdTk+UhfHmOw}BmS8KCZdEJD?;-6>&2sn(S~q;K;kX? zs!CVn29cD4RPWO6NfNL2q!<|v?40(NecDS;CuwA2)XgrHvC@XS$Uu2I4?R$S@VH}S z=6HEzx5CTx_iX8wQD%_-FFLh-;sa&4DiZ^n1nDF9(-{CVEjIi@ysU4Vc=VA?hq){R zX)IPR8~LhJu#73w$36vw7O}D^Ptvi>;#?quyth;R$TC7DG#@c^Amee$#9gaKVA0i9 zxHW@nU_l_c7oF`Zd^epVei0Bv(UQO`KG_P@zPe z&nlg%-&42fOW_x7#^izA+Ta#C*#X$0oafyw(8b$5`eXR+bds<^mWV8CVRwhbvG`=r z!S#ZH#;N)#W92N~a=ywsErU|Wk%J=Tx@CRE*X1)a7VsMCDw?VW7AH1ZmK*P`BlHJC zKiOfXX^|DIGNo}jq&B%PC53GbC_i0TG|4;sbZTtibx?is;#t$eKx_H7+M7cg-$wOs zlH`;{p>wTDlUV{ZHN^aVjY9nzFE>+#9t1Ss(?B2=WJVVEExfl5U(LZzJ}fdFM!faW zpHgbrj^5wDd#vkofLY)>hxs_DV6UyIsl;`+mHn~TW!}YkI3qZgllqgNV$B*;XYqp& z*aLvUxU^2x^_JG|W;5gbvi5i_9m*)kF`VRp?Jmm<G#vVxIPRAU!4is8j0grT-B?=G^>Alkxv7^(owcJ7Wcg?ik zt*=X3H}d#xw#f-EviM$$S|PggcFDaY6J~@CW@t4vXVQc%rx(SHR`KQim)@*aG>Ttx zc&mq!{iqSdy%Y2az|xrW5e`VPxUFsRC6V(k2JpIfyNwkSJ$7QBMk7>R#u_fYNx=G$ z&?xBgMaVo@(LrC>gOFlFrbRwhBye&relZvtG)TmJ`c*v??^qmGpk}2@_Yhl^A}f|3 zNBlltP*kWm9?i8K8~MByjKugf$MIsipZcf;jpFG8S7~f`GXQgTEm^n?VEOw8dcv6X7S0q{7OPqUa#h}$} zI|cXg;*H+hC)`~o%jb8vOh!NYQ_>8%w#H;HSMIVk2Tu1H9QU57zIvuo&}l=FInnif z3E!8zEgAC3JLO<)U0yuzOkXUoPqQ;MNFO;U|1#2{R(`W)4onmjL0Rsxn+fOqBf+h0 zUU5>nb@=H^!3>|`p%9(*#-u_4b04MsuU=FlRKmm=#9Cs+!zy(zb#;!KEiVXda38mn z7c700?>k3)=s4!{W8L<6KX4lh^RPn5*%-V9xE+m~kRl$(fEoE#N*ty%00Ccnp6S3N zkBp30-xXbcfs_`K*Sn%{IOG+;#YrJ*rY}P4%=ji*Mkv-`={b~Hmc4kD zcW3x~Z;EU`8>c7eH<94oxBpt)zV4Micj5Np=%ow%0O)Sf-@k&`{m`<`x_@X&+C* z_a0-(?w*pHb>Wj+EyXlWH`g78%0KsISFBnWp2V?gD@(_P%t)B1Cl=F+`euW#cU^26 zamANm=&5ebJME}$3DvS3ZgduXc)2NpD65AJ9(*af$10M1$ZU6f^XN5A`pjkN?BxWf ztvjDs<#at#4px9B~?JCt6@E^uLlM<*s+6HF%?TH14I)`M~y z9&{gT_$~Tk<>g{wzT9;jlKY-}bG+5AY8v%fCI95*zO)On|7F(JbNm!4tk1Se)~k9B zgeTuw#9CCXk#=C8wS3MR=zAPet4=B&S{kb$hb>sPIl@@YDv8qrVDlr;g zpu0=8nLi={9^IrD`AXwk8;Ovpm0Ll?$J7PC>cw!)T#nB>idI@HOvRLESWwz2_DUXS zGbZJZ1)06@66YXNWQE@w7jd9nR`N1(&UM<3*EzYy%Gf)vDHrD95^qCK`{t$%6YV0) zegC$E;aDb(P&b#)-T8(rx3@adcs8!T=zU2s$qi7LZT~`3WdD}2hDHj;X8SzZ*(md? zVhyHii0f2d!L4rSCC^d$t?{<3M6*3a#Jk{I^15NQz17RBZN#wyT&pjz%FB>niq#n4# zQgt7s_Z681!0!2%Y?&@y@JCcTO_|q)D@?38i;^iW{?OE)32{dHi zDq==NP*}whi^@jLW$O&*en?vsN+*9NyzodmyK~N-PJnZiv~@HfAVa{6qTe1i&pF3m zAAXTSc&WV@)<@6D7-{kIocN)XTwM&3e4@8K zwhep==;~?nY4A+qbgH^Y?|+KTNw(f6P@T>ZKp|xh((B0h+L##bPP&oVMD0jS~ArCOPc7lWZTp~(@sJ}n%B9^f` zZ$eN0i5z1Y&)o80Bas_B35#nL(vzn?^>Iw104`6x0WH;l2us5C0caAw-gGsoEk!_% zX{H+i34+UtdNNvg3(nk?cS)#|oJH#@Pofvj!2uW(jUHmkmBsF88E4ZfZj63n%zrjb zXj0Vv(0#iAzje+zvvc-r`{CWQ0s^12?c;=o>Old+(xAb$=Pl$HQ@1X==Rdn_%R>y8 zwu^X=8toycjReix-4$P)m-6m-6p)K51P}_G#dTuYZfC>2+6cJZj)HFcx8cqRU29w1 z=4!pzVrxX~5O~;<8os(QbL>d*D6G?|bIy^#$;B(7(+O%=9JHGmw;>$%LC%XF|APzk zwGOPPbABJNGPg|?X5!}ctdpJEc-c9!Q=VXxTE!V!@xY)UyUxYNZxzFSX?v*J_h1Oe z>xG2y0lSz`QYFC~koMU#+89@_YJwPN=s-?L=Phai()L{l)a%V40a<&16|>M>m=&{d zSb~2A`GK2PQl}`vqs)(8utE6U$pp+6$L;?6HeC;?)=kO7LNm!m-@uA)dI;Zs=W(Z< zXP@9eiDg)5&X%&?ZCuCNauPbMmqR8rGTJ`M390A1RzGg)#w}d8it#A2voOuKeig%^ zgvj21-kR@NEibZdyHkeX?&wpCyU;RP9MuJI_AC67u~fafyQ8T&xa=j7&q)}ySHtRX z&oVmGnD5kwcHYJj(B61`KpvoRRQ5UH^t_wv{rJCi+><(%d8gq zHDx}G+Unv>yOrxvz>Tf1z#9fu&MBhr+Y4Jv^@E=&yS>q3pK6CG&?r>|$y3A01Bd~I@~eJB@7c{uSaww$wYb>#5&lrIH7LX>mjf& zM3EtC1a#6w$avgkqTtLxiT2L+_wC*75!_G#mPQ@MyVRv`j4>btI*LTKu2plBNdTZ* zERE@cY1bH%8V?>D`JhXNn0W45+0B@l?)G4d=L7(18+K0pmN=$!lg77~2N)Ndq&u?} z+**hKX3fwX%`esr!V4An&02xb+|xhk9MJ!eRR6{2pctY*xqhSgew9ujk@VpOE;lS7 z9;%{LP)x#>&gB)}fyHo;FO{qEwJ_>JB&!LVfvk`E={J|9RI`T1Q@yq$-S%nc0XiZo zAO^v~YA4Bte47W0fC=MgHp2Dx!vi?3o8H8QNp33!EldV~7!cLfE|(9ozD# z-+3%n=2I=@x{6TNtp{6ag z&8ih^*1A5tnuMoA4BRrbF_8v2C)3B0-)OzT(Yimo zb2md=akyG2t`uE&C6$1=SzeLE|C?oOUEL6pHZa%U22g_iDucj3S>V5$8#Vo_DWR<{ z4M!!?*ut#=+z)Nx;&3p%cl0L?U`%6$9)C~C`4^-z8Q1c>H>A*ebC&(Hebef<3n zxmp$0^=$CMIrW7T(1Ko>&`c z1P8BYhP$@i!UV-8%j=X>>dly#nq({8+?6e*MW5vFu468qtzANoXFQvpuMfC9)M)L` zOcXm(lc`KIcG?frq51yK-J8VCuQPN;_$wo{uFUxLsqgESgRnFm#|i3p;Jnu_&J}88 zRAY7>Q^;(#1~tn-CNa}hwuATjo7aVI{n)Hdk0(*=XHwzdJpXYRml&oIK4IA^#raZs zl9L6nCk*q@B~YUmVHg0F8yT4Qk-_Q-Fsz(T$N7bhy+xCjr|*7`+O%-nrf7cK75 zOavK?P0lS6Y4Xd>VWk$E7he}XEf0bFHTo@J#@aM5rI!ukJZ&cH65^+c9dEi1-TugF zZrd`->R$JmR6hW>AKYZ$l=S55Wy#SR(J7}^Wy0F9Svcp7U?Ga8N4(|Ms*?3<8>z`Q zsZtfsjYeZf%=_b6;$k*OV{DB^qhAK8EKH_^dDh8@D9+|REm*D;dO~GKHfwMnGWw)m z%I@hv0rB@FX`kp(3G#iaN!}g5`x<`04c%*&+KM zL^mS{Cd(AUr1Jud^G3{)jtp)wYx|g#z8koA&;udis<% z^x3?lnz0z5$f7xJ&38Wxo2l}ZA?zacRiRYBLup-dBIdm7x4*alC`0>`HHJ(d|BHQ= zxA62%TPc-RA3@pD5&>$GHcwCutKOlkeNyAmx2x$Ex#+YA@f5S)@Il z#Yx0M&v@J~n)qNYo2hks-j3*D#!-e#>P~8eYUU9`xFVA%oT@<7%J)u}Sl|lo;&<3t zw2KQ=l%oR+*>>VQ@7vT&)a9)%Y!Y)g+pqpTY{!`31L&dyy55;Ba->OQP4T^Xrx)|p z=TI@AO4P|JCL)U}Pw=j@9iB>SX08f&My35bMVsZT?j!s>`f&cq&m-x`1H`NO&UNHN zDx0eq=I5o7Gw5j$%&~G7kyqAtpE0wJjm1$mw)s>w@|g>(jtH=a)`$ zzA3vGH&@*a!`=U;Ol@&?Kl}zVn@68g%zWc;u}EcO(!Rvcf5XwG^fiZujVT+^fV5Zpo13Q1YY5~=dki;nXgz zn5w{ggo4A1$mciGyKUTv(jkH)g{|W&T~9m4t6G&EP0O4rbk--U{F`RZQeLE$eljc; zvofPG-^>dd*6+y+`p94jiS9@n@u=$w8u2J2F?&YKUZBaY%-g&z*%K*iXm@jwnB4!3 z?g!l-mJR>L9C48W8=E(W=eqiF&y&)Q?wRA5SKp2@s|I2cg2;l^X?xCq%OUE$Dg8%W z<{nI~oheIf8ny3s0D1M-cy3})@KE^oAbNOvhUbfM7B6(Qcv`!UnDR=%GTI^PltHH? z2_t@@O0X&RxbK`2(%0YdNj{qm?7OE#MPA7Sdbg4CmL=1)SfdBy#v!mL%0k%OFe0a? zJCEUgjshOi-4t(e3m}7~KG4`7U%rK#SB#(6lNY;#-;+r{y{1AaL)P^o4t@veTpZZ3 z1w}fWLg}+zWV2+aiDhPhl1tt|hhigq9^i1y2%_<(tf`XyKX=uAkB#WV6JRT1g0QZUOS?rqf&-Kyk{|%tT zw%c_AQOU^uxHK)2E<;9=rif(fiRc2qKV*}5I`7}@7WlgPtuZGE$07FOlBca;aQacN zXuw6{6A?z|tn!c4Wa5DVGJ_?e0sAQj6;sgEVnpbvy9%%SMxY(lM-#n_gfeHZTG(^) zn}3{0azuP1COCAvq}W$Pit!RN7i&!hGA3uYq?Yzb$uQCIa_;>PGyV>0QE?3z{AY6HRn++JSoeRA{PpzYw3YuKB7feia1|Zq z{(ltsUxkH#ixdBv#Q493{*v-i=yjm0e`1Wkg#Z6(9#jKOZ#NY7kcV42q3wV^vKp4w zwkWua!b;+(G*!zhl!T5sRb4GCUD5GAi@XI2+}I+#|E$tYvK`rqoa{9k5@i!;|aQ~h)r`%Ov zKQWpG4288&HbWy7&=uVJnZWv&kp1WP2vAGP7 zul z%zZUYTPL^_I%44B`bsK@fHzh#3fE{fli-FzTPym4u<-wv+YMrd?V3 zS4#5#?1-1Er4=p;1A=f-GynAifbjA0@BypqWT2P#Dp$qT%F(09@_=&le@yCjqYhR<0bO8v^EvxY}tD7zzU8 LGBQf5%HaMVWGCJC literal 0 HcmV?d00001