% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Reuter:1031485,
author = {Reuter, Jan Andre and Feld, Christian and Mohr, Bernd},
title = {{S}core-{P} and {OMPT}: {S}moothing the bumpy road to
{O}pen{MP} performance measurement},
reportid = {FZJ-2024-05698},
year = {2024},
abstract = {The OpenMP API is a widely used interface for high-level
parallel programming in C, C++ and Fortran. Initially
introduced in 1997, it now targets three basic processor
building blocks, CPUs, SIMD vector units, and accelerators.
With large adoption in the HPC community and wide support
from compiler vendors, OpenMP grew into a key component in
leveraging node-level parallelism in applications and
frameworks. Herewith, a need for OpenMP-aware performance
measurement and analysis tools arose. In version 5.0 of the
OpenMP specification, the OpenMP Tools Interface (OMPT) was
introduced, providing means to collect precise information
about the application's use of OpenMP directives and lock
routines. Although provided with a detailed specification,
understanding and correctly handling the CPU execution model
event sequence dispatched from various vendor's runtimes
requires detailed analysis of events, their parameters and
executing threads. To facilitate this analysis, we developed
a freely available OMPT tool that allows for dumping
execution model events and corresponding metadata for
post-mortem inspection. Analyzing the output of this tool
applied to the official OpenMP examples and handwritten
smoke tests, enabled us to implement an OMPT tool for the
performance measurement infrastructure Score-P, replacing
the long-established, but feature-incomplete
source-to-source OpenMP instrumenter OPARI2. Both OMPT tools
are regularly tested against the aforementioned OpenMP
examples and smoke tests. As vendors take the freedom to
interpret the OMPT specification, various checks were
developed to detect deviations. In Score-P, deviations are
classified as fatal, disengageable, and remediable. Based on
feedback given to the vendors, several of the deviations are
no longer a concern. Accompanying the development of OMPT
itself, the overhead being introduced in the OpenMP runtimes
was always a concern. To assess this overhead in various
contemporary runtimes, we used the EPCC and SPEC OpenMP
benchmark suites, with OMPT disabled (if possible), with a
dummy tool, and with the Score-P OMPT tool attached.},
month = {Sep},
date = {2024-09-19},
organization = {15th International Parallel Tools
Workshop 2024, Dresden (Germany), 19
Sep 2024 - 20 Sep 2024},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / BMBF 16ME0630 - ENSIMA -
Energieoptimiertes High-Performance Computing für
Finite-Elemente-Simulationen in der Produktentwicklung
(16ME0630) / ATMLPP - ATML Parallel Performance (ATMLPP)},
pid = {G:(DE-HGF)POF4-5112 / G:(BMBF)16ME0630 /
G:(DE-Juel-1)ATMLPP},
typ = {PUB:(DE-HGF)6},
doi = {10.34734/FZJ-2024-05698},
url = {https://juser.fz-juelich.de/record/1031485},
}