diff --git a/manuscrit/10_introduction/main.tex b/manuscrit/10_introduction/main.tex index 1a5c205..9c927c5 100644 --- a/manuscrit/10_introduction/main.tex +++ b/manuscrit/10_introduction/main.tex @@ -56,8 +56,12 @@ slower than the former~\cite{rowmajor_repo}. This, however, is still an optimization that holds for the vast majority of CPUs. In many cases, transformations targeting a specific microarchitecture can -be very beneficial. \qtodo{Insert number/ref \wrt{} matmult or some kernel of -the like.} This kind of optimizations, however, requires manual effort, and a +be very beneficial. +For instance, Uday Bondhugula found out that manual tuning, through many +techniques and tools, of a general matrix multiplication could multiply its +throughput by roughly 13.5 compared to \texttt{gcc~-O3}, or even 130 times +faster than \texttt{clang -O3}~\cite{dgemm_finetune}. +This kind of optimizations, however, requires manual effort, and a deep expert knowledge both in optimization techniques and on the specific architecture targeted. These techniques are only worth applying on the parts of a program that are diff --git a/manuscrit/biblio/misc.bib b/manuscrit/biblio/misc.bib index 35ccd2e..1139896 100644 --- a/manuscrit/biblio/misc.bib +++ b/manuscrit/biblio/misc.bib @@ -148,3 +148,13 @@ month=10, howpublished={\url{https://gitlab.inria.fr/tbastian/rowmajor-measure}}, } + +@misc{dgemm_finetune, + title={High Performance Code Generation in MLIR: An Early Case Study + with GEMM}, + author={Uday Bondhugula}, + year={2020}, + eprint={2003.00532}, + archivePrefix={arXiv}, + primaryClass={cs.PF} +}