% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MISC{Feld:1049907,
author = {Feld, Christian and Jäkel, René and Lorenz, Daniel and
Wesarg, Bert and Schmidl, Dirk and Tschüter, Ronny and
Oleynik, Yury and Wagner, Michael and Eschweiler, Dominic
and Spazier, Johannes and Knüpfer, Andreas and Shende,
Sameer and Millstein, Suzanne and Biersdorff, Scott and
Geimer, Markus and Schlütter, Marc and Schmitt, Felix and
Ziegenbalg, Johannes and Zhukov, Ilya and Dietrich, Robert
and Geyer, Robin and Saviankou, Pavel and Knobloch, Michael
and Mijaković, Robert and Schöne, Robert and Winkler,
Frank and Ilsche, Thomas and Hermanns, Marc-André and
Brendel, Ronny and Oeste, Sebastian and Herold, Christian
and Sigl, Severin and Hilbrich, Tobias and Williams, Bill
and Klotz, Sven and Corbin, Gregor and Reuter, Jan André
and Grund, Alexander and Sander, Maximilian and Frenzel,
Jan},
title = {{S}core-{P}: {S}calable performance measurement
infrastructure for parallel codes (v9.4); 9.4},
reportid = {FZJ-2025-05667},
year = {2025},
abstract = {The instrumentation and measurement framework Score-P,
together with analysis tools build on top of its output
formats, provides insight into massively parallel HPC
applications, their communication, synchronization, I/O, and
scaling behavior to pinpoint performance bottlenecks and
their causes.<br>Score-P is a highly scalable and
easy-to-use tool suite for profiling (summarizing program
execution) and event tracing (capturing events in
chronological order) of HPC applications.<br>The scorep
instrumentation command adds instrumentation hooks into a
user's application by either prepending or replacing the
compile and link commands. C, C++, Fortran, and Python codes
as well as contemporary HPC programming models (MPI,
threading, GPUs, I/O) are supported.<br>When running an
instrumented application, measurement event data is provided
by the instrumentation hooks to the measurement core. There,
the events are augmented with high-accuracy timestamps and
potentially hardware counters (a plugin-API allows querying
additional metric sources). The augmented events are then
passed to one or both of the built-in event consumers,
profiling and tracing (a plugin-API allows creation of
additional event consumers) which finally provide output in
the formats CUBE4 and OTF2, respectively. These open and
backwards-compatible output formats can be consumed by
established analysis tools, e.g., like<ul><li>CubeGUI, the
performance report explorer for Scalasca and Score-P, a
generic tool for displaying a multidimensional performance
space,</li><li>Extra-P, an automatic performance-modelling
tool that supports the user in the identification of
scalability bugs,</li><li>TAU's ParaProf, a portable,
scalable performance analysis tool, and PerfExplorer, a
framework for parallel performance data mining and knowledge
discovery,</li><li>Scalasca Trace Tools, a collection of
trace-based performance analysis tools that have been
specifically designed for use on large-scale systems
featuring hundreds of thousands of CPU cores, automatically
identifying potential communication and synchronization
bottlenecks and offering guidance in exploring their causes,
and</li><li>Vampir, a trace-based framework that enables
users to quickly display and analyze arbitrary program
behavior.</li></ul>Score-P is available under the 3-clause
BSD Open Source license.<br><i>Version 9.4 is a bugfix
release for version 9.3. For features/changes/improvements
introduced in the latest version, please see the Changelog
file:
https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/scorep-9.4/ChangeLog.txt</i>},
keywords = {Performance measurement (Other) / Score-P (Other) /
instrumentation (Other) / sampling (Other) / HPC (Other) /
profiling (Other) / tracing (Other)},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / ATMLPP - ATML Parallel
Performance (ATMLPP) / ATMLAO - ATML Application
Optimization and User Service Tools (ATMLAO)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-Juel-1)ATMLPP /
G:(DE-Juel-1)ATMLAO},
typ = {PUB:(DE-HGF)33},
doi = {10.5281/ZENODO.17964650},
url = {https://juser.fz-juelich.de/record/1049907},
}