% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Reuter:1031485,
      author       = {Reuter, Jan Andre and Feld, Christian and Mohr, Bernd},
      title        = {{S}core-{P} and {OMPT}: {S}moothing the bumpy road to
                      {O}pen{MP} performance measurement},
      reportid     = {FZJ-2024-05698},
      year         = {2024},
      abstract     = {The OpenMP API is a widely used interface for high-level
                      parallel programming in C, C++ and Fortran. Initially
                      introduced in 1997, it now targets three basic processor
                      building blocks, CPUs, SIMD vector units, and accelerators.
                      With large adoption in the HPC community and wide support
                      from compiler vendors, OpenMP grew into a key component in
                      leveraging node-level parallelism in applications and
                      frameworks. Herewith, a need for OpenMP-aware performance
                      measurement and analysis tools arose. In version 5.0 of the
                      OpenMP specification, the OpenMP Tools Interface (OMPT) was
                      introduced, providing means to collect precise information
                      about the application's use of OpenMP directives and lock
                      routines. Although provided with a detailed specification,
                      understanding and correctly handling the CPU execution model
                      event sequence dispatched from various vendor's runtimes
                      requires detailed analysis of events, their parameters and
                      executing threads. To facilitate this analysis, we developed
                      a freely available OMPT tool that allows for dumping
                      execution model events and corresponding metadata for
                      post-mortem inspection. Analyzing the output of this tool
                      applied to the official OpenMP examples and handwritten
                      smoke tests, enabled us to implement an OMPT tool for the
                      performance measurement infrastructure Score-P, replacing
                      the long-established, but feature-incomplete
                      source-to-source OpenMP instrumenter OPARI2. Both OMPT tools
                      are regularly tested against the aforementioned OpenMP
                      examples and smoke tests. As vendors take the freedom to
                      interpret the OMPT specification, various checks were
                      developed to detect deviations. In Score-P, deviations are
                      classified as fatal, disengageable, and remediable. Based on
                      feedback given to the vendors, several of the deviations are
                      no longer a concern. Accompanying the development of OMPT
                      itself, the overhead being introduced in the OpenMP runtimes
                      was always a concern. To assess this overhead in various
                      contemporary runtimes, we used the EPCC and SPEC OpenMP
                      benchmark suites, with OMPT disabled (if possible), with a
                      dummy tool, and with the Score-P OMPT tool attached.},
      month         = {Sep},
      date          = {2024-09-19},
      organization  = {15th International Parallel Tools
                       Workshop 2024, Dresden (Germany), 19
                       Sep 2024 - 20 Sep 2024},
      subtyp        = {After Call},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511) / BMBF 16ME0630 - ENSIMA -
                      Energieoptimiertes High-Performance Computing für
                      Finite-Elemente-Simulationen in der Produktentwicklung
                      (16ME0630) / ATMLPP - ATML Parallel Performance (ATMLPP)},
      pid          = {G:(DE-HGF)POF4-5112 / G:(BMBF)16ME0630 /
                      G:(DE-Juel-1)ATMLPP},
      typ          = {PUB:(DE-HGF)6},
      doi          = {10.34734/FZJ-2024-05698},
      url          = {https://juser.fz-juelich.de/record/1031485},
}