This commit is contained in:
Théophile Bastian 2023-10-09 18:13:30 +02:00
parent 0d7bf6a974
commit 9dd4432342
2 changed files with 16 additions and 2 deletions
manuscrit
10_introduction
biblio

View file

@ -56,8 +56,12 @@ slower than the former~\cite{rowmajor_repo}.
This, however, is still an optimization that holds for the vast majority of This, however, is still an optimization that holds for the vast majority of
CPUs. In many cases, transformations targeting a specific microarchitecture can CPUs. In many cases, transformations targeting a specific microarchitecture can
be very beneficial. \qtodo{Insert number/ref \wrt{} matmult or some kernel of be very beneficial.
the like.} This kind of optimizations, however, requires manual effort, and a For instance, Uday Bondhugula found out that manual tuning, through many
techniques and tools, of a general matrix multiplication could multiply its
throughput by roughly 13.5 compared to \texttt{gcc~-O3}, or even 130 times
faster than \texttt{clang -O3}~\cite{dgemm_finetune}.
This kind of optimizations, however, requires manual effort, and a
deep expert knowledge both in optimization techniques and on the specific deep expert knowledge both in optimization techniques and on the specific
architecture targeted. architecture targeted.
These techniques are only worth applying on the parts of a program that are These techniques are only worth applying on the parts of a program that are

View file

@ -148,3 +148,13 @@
month=10, month=10,
howpublished={\url{https://gitlab.inria.fr/tbastian/rowmajor-measure}}, howpublished={\url{https://gitlab.inria.fr/tbastian/rowmajor-measure}},
} }
@misc{dgemm_finetune,
title={High Performance Code Generation in MLIR: An Early Case Study
with GEMM},
author={Uday Bondhugula},
year={2020},
eprint={2003.00532},
archivePrefix={arXiv},
primaryClass={cs.PF}
}