@inproceedings{87efdeeabd514714b3db9a158dbca9c6,
title = "Bringing Auto-Tuning to HIP: Analysis of Tuning Impact and Difficulty on AMD and Nvidia GPUs",
abstract = "Many studies have focused on developing and improving auto-tuning algorithms for Nvidia Graphics Processing Units (GPUs), but the effectiveness and efficiency of these approaches on AMD devices have hardly been studied. This paper aims to address this gap by introducing an auto-tuner for AMD{\textquoteright}s HIP. We do so by extending Kernel Tuner, an open-source Python library for auto-tuning GPU programs. We analyze the performance impact and tuning difficulty for four highly-tunable benchmark kernels on four different GPUs: two from Nvidia and two from AMD. Our results demonstrate that auto-tuning has a significantly higher impact on performance on AMD compared to Nvidia (10x vs 2x). Additionally, we show that applications tuned for Nvidia do not perform optimally on AMD, underscoring the importance of auto-tuning specifically for AMD to achieve high performance on these GPUs.",
keywords = "Auto-tuning, CUDA, GPU Programming, HIP",
author = "Milo Lurati and Stijn Heldens and Alessio Sclocco and {van Werkhoven}, Ben",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.; 30th International Conference on Parallel and Distributed Computing, Euro-Par 2024 ; Conference date: 26-08-2024 Through 30-08-2024",
year = "2024",
doi = "10.1007/978-3-031-69577-3_7",
language = "English",
isbn = "9783031695766",
volume = "1",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "91--106",
editor = "Jesus Carretero and Javier Garcia-Blas and Sameer Shende and Ivona Brandic and Katzalin Olcoz and Martin Schreiber",
booktitle = "Euro-Par 2024: Parallel Processing",
address = "Germany",
}