% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Schltter:152041,
author = {Schlütter, Marc and Philippen, Peter and Morin, Laurent
and Geimer, Markus and Mohr, Bernd},
title = {{P}rofiling {H}ybrid {HMPP} {A}pplications with {S}core-{P}
on {H}eterogeneous {H}ardware},
volume = {25},
publisher = {IOS Press},
reportid = {FZJ-2014-01861},
isbn = {978-1-61499-380-3},
series = {Advances in Parallel Computing},
pages = {773 - 782},
year = {2014},
comment = {Parallel Computing: Accelerating Computational Science and
Engineering (CSE)},
booktitle = {Parallel Computing: Accelerating
Computational Science and Engineering
(CSE)},
abstract = {In heterogeneous environments with multi-core systems and
accelerators, programming and optimizing large parallel
applications turns into a time-intensive and
hardware-dependent challenge. To assist application
developers in this process, a number of tools and high-level
compilers have been developed. Directive-based programming
models such as HMPP and OpenACC provide abstractions over
low-level GPU programming models,such as CUDA or OpenCL. The
compilers developed by CAPS automatically transform the
pragma-annotated application code into low-level code,
thereby allowing the parallelization and optimization for a
given accelerator hardware. To analyze the performance of
parallel applications, multiple partners in Germany and the
US jointly develop the community measurement infrastructure
Score-P. Score-P gathers performance execution profiles,
which can be presented and analyzed within the CUBE result
browser, and collects detailed event traces to be processed
by post-mortem analysis tools such as Scalasca and Vampir.In
this paper we present the integration and combined use of
Score-P and the CAPS compilers as one approach to
efficiently parallelize and optimize codes. Specifically, we
describe the PHMPP profiling interface, it's implementation
in Score-P, and the presentation of preliminary results in
CUBE.},
month = {Sep},
date = {2013-09-10},
organization = {International Conference on Parallel
Computing, Munich (Germany), 10 Sep
2013 - 13 Sep 2013},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {411 - Computational Science and Mathematical Methods
(POF2-411) / ATMLPP - ATML Parallel Performance (ATMLPP)},
pid = {G:(DE-HGF)POF2-411 / G:(DE-Juel-1)ATMLPP},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
UT = {WOS:000452120400078},
doi = {10.3233/978-1-61499-381-0-773},
url = {https://juser.fz-juelich.de/record/152041},
}