phd-thesis/manuscrit/biblio/tools.bib

@misc{tool:perf,
    title={perf: Linux profiling with performance counters},
    author={{Linux Kernel}},
    howpublished={\url{http://perf.wiki.kernel.org/index.php/Main_Page}}
}

@article{tool:hpctoolkit,
    author = {Adhianto, L. and Banerjee, S. and Fagan, M. and Krentel, M. and Marin, G. and Mellor-Crummey, J. and Tallent, N. R.},
    title = {HPCTOOLKIT: tools for performance analysis of optimized parallel programs},
    journal = {Concurrency and Computation: Practice and Experience},
    volume = {22},
    number = {6},
    pages = {685-701},
    keywords = {performance tools, call path profiling, tracing, binary analysis, execution monitoring},
    doi = {https://doi.org/10.1002/cpe.1553},
    url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/cpe.1553},
    eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/cpe.1553},
    abstract = {Abstract HPCTOOLKIT is an integrated suite of tools that supports measurement, analysis, attribution, and presentation of application performance for both sequential and parallel programs. HPCTOOLKIT can pinpoint and quantify scalability bottlenecks in fully optimized parallel programs with a measurement overhead of only a few percent. Recently, new capabilities were added to HPCTOOLKIT for collecting call path profiles for fully optimized codes without any compiler support, pinpointing and quantifying bottlenecks in multithreaded programs, exploring performance information and source code using a new user interface, and displaying hierarchical space–time diagrams based on traces of asynchronous call path samples. This paper provides an overview of HPCTOOLKIT and illustrates its utility for performance analysis of parallel applications. Copyright © 2009 John Wiley \& Sons, Ltd.},
    year = {2010}
}

@misc{tool:capstone,
    title={Capstone engine},
    author={Nguyen Anh Quynh and the Capstone collaborators},
    howpublished={\url{https://www.capstone-engine.org/}}
}

@article{tool:valgrind,
    author    = {Nicholas Nethercote and
                 Julian Seward},
    title     = {Valgrind: {A} Program Supervision Framework},
    journal   = {Electr. Notes Theor. Comput. Sci.},
    volume    = {89},
    number    = {2},
    pages     = {44--66},
    year      = {2003},
}

@misc{tool:valgrind_arch_support,
    title={Valgrind --- supported platforms},
    author={{Valgrind developpers}},
    howpublished={\url{https://valgrind.org/info/platforms.html}},
    year=2023,
    month=09,
}

@inproceedings{valgrind_avx512,
    author  = {Volnina, Tanya},
    title   = {Enable {AVX-512} instructions in Valgrind},
    url     = {https://fosdem.org/2022/schedule/event/valgrind_avx512/},
    year    = {2022},
    publisher = {{FOSDEM}},
}

@techreport{tool:pluto,
    author = {Uday Bondhugula and J. Ramanujam and P.
              Sadayappan},
    title = {PLuTo: A Practical and Fully Automatic Polyhedral Parallelizer and Locality Optimizer},
    year = 2007,
    month = oct,
    number = "OSU-CISRC-10/07-TR70",
    institution = {The Ohio State University}
}

@misc{tool:pocc,
    title={{PoCC}, the Polyhedral Compiler Collection},
    author={Pouchet, Louis-No{\"e}l},
    year=2009,
    note={\url{https://www.cs.colostate.edu/~pouchet/software/pocc/}},
}

@misc{tool:gurobi,
    title={Gurobi Optimizer},
    author={Gurobi Optimization, LLC},
    howpublished={\url{https://www.gurobi.com}}
}

@inproceedings{tool:papi,
    title={PAPI: A portable interface to hardware performance counters},
    author={Mucci, Philip J and Browne, Shirley and Deane, Christine and
            Ho, George},
    booktitle={Proceedings of the department of defense HPCMP users group
               conference},
    volume={710},
    year={1999}
}

@misc{tool:qemu,
	title={{QEMU}: the {FAST!} processor emulator},
	author={{QEMU}},
	howpublished={\url{https://www.qemu.org}}
}

% OpenBLAS
@inproceedings{openblas_2013,
	author = {Wang, Qian and Zhang, Xianyi and Zhang, Yunquan and Yi, Qing},
	title = {AUGEM: Automatically Generate High Performance Dense Linear Algebra Kernels on X86 CPUs},
	year = {2013},
	isbn = {9781450323789},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	url = {https://doi.org/10.1145/2503210.2503219},
	doi = {10.1145/2503210.2503219},
	abstract = {Basic Liner algebra subprograms (BLAS) is a fundamental library in scientific computing. In this paper, we present a template-based optimization framework, AUGEM, which can automatically generate fully optimized assembly code for several dense linear algebra (DLA) kernels, such as GEMM, GEMV, AXPY and DOT, on varying multi-core CPUs without requiring any manual interference from developers. In particular, based on domain-specific knowledge about algorithms of the DLA kernels, we use a collection of parameterized code templates to formulate a number of commonly occurring instruction sequences within the optimized low-level C code of these DLA kernels. Then, our framework uses a specialized low-level C optimizer to identify instruction sequences that match the pre-defined code templates and thereby translates them into extremely efficient SSE/AVX instructions. The DLA kernels generated by our template-based approach surpass the implementations of Intel MKL and AMD ACML BLAS libraries, on both Intel Sandy Bridge and AMD Piledriver processors.},
	booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis},
	articleno = {25},
	numpages = {12},
	keywords = {auto-tuning, code generation, DLA code optimization},
	location = {Denver, Colorado},
	series = {SC '13}
}

@misc{openblas_webpage,
	title={{OpenBLAS}: an optimized {BLAS} library},
	author={Xianyi, Zhang},
	howpublished={\url{https://www.qemu.org}}
}

@misc{tool:google_exegesis,
    title={{EXEgesis}},
    author={{Google}},
    howpublished={\url{https://github.com/google/EXEgesis}},
}

@misc{intel_mkl,
    title={oneAPI Math Kernel Library ({oneMKL})},
    author={{Intel}},
    howpublished={\url{https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html}},
    year=2003,
}

@misc{intel_vtune,
    title={{VTune} profiler},
    author={{Intel}},
    howpublished={\url{https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html}},
}