% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Reuter:1027498,
author = {Reuter, Jan Andre and Williams, William R. and Mohr, Bernd},
title = {{P}erformance {A}nalysis of {O}pen{MP} {T}arget
{O}ffloading in {S}core-{P}},
reportid = {FZJ-2024-03907},
year = {2024},
abstract = {With increasing demand in compute performance of HPC
systems, accelerators are getting the main focus for
application development. Many of the Top500 HPC systems now
include accelerators, with the top 3 systems alone having
accelerators of three different vendors. This diversity
requires application developers to choose portable
frameworks to support all at the same time, as developing
applications via each native API is time consuming. One of
the available frameworks is OpenMP with its offloading
capability and availability for C, C++ and Fortran. With
OpenMP offloading gaining more traction recently,
performance analysis becomes important as well. With this
poster, we present our first results in adding support for
OpenMP offloading to our instrumentation and measurement
infrastructure Score-P using the OpenMP Tools Interface. We
demonstrate how we can use both host side callbacks and the
device tracing interface to build a measurement adapter
capable of analyzing OpenMP applications effectively. We
show the current support landscape between different
compilers and present first results for profiles and event
traces based on the SPEC HPC 2021 $618.tealeaf_s$ benchmark
running on the LUMI HPC cluster at CSC in Finland.},
month = {May},
date = {2024-05-13},
organization = {ISC High Performance 2024, Hamburg
(Germany), 13 May 2024 - 15 May 2024},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / BMBF 16ME0630 - ENSIMA -
Energieoptimiertes High-Performance Computing für
Finite-Elemente-Simulationen in der Produktentwicklung
(16ME0630) / ATMLPP - ATML Parallel Performance (ATMLPP)},
pid = {G:(DE-HGF)POF4-5112 / G:(BMBF)16ME0630 /
G:(DE-Juel-1)ATMLPP},
typ = {PUB:(DE-HGF)24},
doi = {10.34734/FZJ-2024-03907},
url = {https://juser.fz-juelich.de/record/1027498},
}