140 lines
5.3 KiB
BibTeX
140 lines
5.3 KiB
BibTeX
@inproceedings{uica,
|
|
author = {Abel, Andreas and Reineke, Jan},
|
|
title = {{UiCA}: Accurate Throughput Prediction of Basic Blocks on Recent Intel Microarchitectures},
|
|
year = {2022},
|
|
isbn = {9781450392815},
|
|
publisher = {Association for Computing Machinery},
|
|
address = {New York, NY, USA},
|
|
url = {https://doi.org/10.1145/3524059.3532396},
|
|
doi = {10.1145/3524059.3532396},
|
|
booktitle = {Proceedings of the 36th ACM International Conference on Supercomputing},
|
|
articleno = {33},
|
|
numpages = {14},
|
|
keywords = {simulation, benchmarking, pipeline model, optimization, performance, throughput prediction},
|
|
location = {Virtual Event},
|
|
series = {ICS '22}
|
|
}
|
|
|
|
@article{ithemal,
|
|
author = {Charith Mendis and
|
|
Saman P. Amarasinghe and
|
|
Michael Carbin},
|
|
title = {Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation
|
|
using Deep Neural Networks},
|
|
journal = {CoRR},
|
|
volume = {abs/1808.07412},
|
|
year = {2018},
|
|
url = {http://arxiv.org/abs/1808.07412},
|
|
eprinttype = {arXiv},
|
|
eprint = {1808.07412},
|
|
timestamp = {Sun, 02 Sep 2018 15:01:55 +0200},
|
|
biburl = {https://dblp.org/rec/journals/corr/abs-1808-07412.bib},
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
}
|
|
|
|
@inproceedings{bhive,
|
|
author={Chen, Yishen and Brahmakshatriya, Ajay and Mendis, Charith and Renda, Alex and Atkinson, Eric and Sýkora, Ondřej and Amarasinghe, Saman and Carbin, Michael},
|
|
booktitle={2019 IEEE International Symposium on Workload Characterization (IISWC)},
|
|
title={BHive: A Benchmark Suite and Measurement Framework for Validating x86-64 Basic Block Performance Models},
|
|
year={2019},
|
|
volume={},
|
|
number={},
|
|
pages={167-177},
|
|
doi={10.1109/IISWC47752.2019.9042166}}
|
|
|
|
@article{anica,
|
|
author = {Ritter, Fabian and Hack, Sebastian},
|
|
title = {AnICA: Analyzing Inconsistencies in Microarchitectural Code Analyzers},
|
|
year = {2022},
|
|
issue_date = {October 2022},
|
|
publisher = {Association for Computing Machinery},
|
|
address = {New York, NY, USA},
|
|
volume = {6},
|
|
number = {OOPSLA2},
|
|
url = {https://doi.org/10.1145/3563288},
|
|
doi = {10.1145/3563288},
|
|
journal = {Proc. ACM Program. Lang.},
|
|
month = {oct},
|
|
articleno = {125},
|
|
numpages = {29},
|
|
keywords = {Throughput Prediction, Basic Blocks, Abstraction, Differential Testing}
|
|
}
|
|
|
|
@inproceedings{palmed,
|
|
author={Derumigny, Nicolas and Bastian, Théophile and Gruber, Fabian and Iooss, Guillaume and Guillon, Christophe and Pouchet, Louis-Noël and Rastello, Fabrice},
|
|
booktitle={2022 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)},
|
|
title={PALMED: Throughput Characterization for Superscalar Architectures},
|
|
year={2022},
|
|
volume={},
|
|
number={},
|
|
pages={106-117},
|
|
doi={10.1109/CGO53902.2022.9741289}
|
|
}
|
|
|
|
|
|
@misc{llvm-mca,
|
|
author = {{Sony Corporation} and {LLVM Project}},
|
|
title = {{LLVM} Machine Code Analyzer},
|
|
howpublished = {\url{https://llvm.org/docs/CommandGuide/llvm-mca.html}},
|
|
}
|
|
|
|
@misc{iaca,
|
|
title={Intel Architecture Code Analyzer ({IACA})},
|
|
author={{Intel Corporation}},
|
|
howpublished={\url{https://software.intel.com/en-us/articles/intel-architecture-code-analyzer/}}
|
|
}
|
|
|
|
|
|
@INPROCEEDINGS{osaca1,
|
|
author={Laukemann, Jan and Hammer, Julian and Hofmann, Johannes and Hager, Georg and Wellein, Gerhard},
|
|
booktitle={2018 IEEE/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)},
|
|
title={Automated Instruction Stream Throughput Prediction for Intel and AMD Microarchitectures},
|
|
year={2018},
|
|
volume={},
|
|
number={},
|
|
pages={121-131},
|
|
doi={10.1109/PMBS.2018.8641578}
|
|
}
|
|
|
|
@INPROCEEDINGS{osaca2,
|
|
author={Laukemann, Jan and Hammer, Julian and Hager, Georg and Wellein, Gerhard},
|
|
booktitle={2019 IEEE/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)},
|
|
title={Automatic Throughput and Critical Path Analysis of x86 and ARM Assembly Kernels},
|
|
year={2019},
|
|
volume={},
|
|
number={},
|
|
pages={1-6},
|
|
doi={10.1109/PMBS49563.2019.00006}
|
|
}
|
|
|
|
@inproceedings{uopsinfo,
|
|
title = {uops.info: Characterizing Latency, Throughput, and Port Usage of Instructions on Intel Microarchitectures},
|
|
acmid = {3304062},
|
|
address = {New York, NY, USA},
|
|
author = {Abel, Andreas and Reineke, Jan},
|
|
booktitle = {ASPLOS},
|
|
doi = {10.1145/3297858.3304062},
|
|
isbn = {978-1-4503-6240-5},
|
|
location = {Providence, RI, USA},
|
|
numpages = {14},
|
|
pages = {673--686},
|
|
publisher = {ACM},
|
|
series = {ASPLOS '19},
|
|
year = {2019},
|
|
url = {http://doi.acm.org/10.1145/3297858.3304062}
|
|
}
|
|
|
|
@article{nanobench,
|
|
author = {Andreas Abel and Jan Reineke},
|
|
title = {nano{B}ench: A Low-Overhead Tool for Running Microbenchmarks on x86 Systems},
|
|
journal = {arXiv e-prints},
|
|
volume = {abs/1911.03282},
|
|
year = {2019},
|
|
url = {http://arxiv.org/abs/1911.03282},
|
|
archivePrefix = {arXiv},
|
|
eprint = {1911.03282},
|
|
timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
|
|
biburl = {https://dblp.org/rec/journals/corr/abs-1911-03282.bib},
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
}
|
|
|