% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Haensel:889149,
author = {Haensel, David and Morgenstern, Laura and Beckmann, Andreas
and Kabadshow, Ivo and Dachsel, Holger},
title = {{E}ventify: {E}vent-{B}ased {T}ask {P}arallelism for
{S}trong {S}caling},
publisher = {ACM New York, NY, USA},
reportid = {FZJ-2021-00074},
pages = {1-10},
year = {2020},
comment = {Proceedings of the Platform for Advanced Scientific
Computing Conference - ACM New York, NY, USA, 2020. - ISBN
9781450379939 - doi:10.1145/3394277.3401858},
booktitle = {Proceedings of the Platform for
Advanced Scientific Computing
Conference - ACM New York, NY, USA,
2020. - ISBN 9781450379939 -
doi:10.1145/3394277.3401858},
abstract = {Today's processors become fatter, not faster. However, the
exploitation of these massively parallel compute resources
remains a challenge for many traditional HPC applications
regarding scalability, portability and programmability. To
tackle this challenge, several parallel programming
approaches such as loop parallelism and task parallelism are
researched in form of languages, libraries and frameworks.
Task parallelism as provided by OpenMP, HPX, StarPU, Charm++
and Kokkos is the most promising approach to overcome the
challenges of ever increasing parallelism. The
aforementioned parallel programming technologies enable
scalability for a broad range of algorithms with
coarse-grained tasks, e. g. in linear algebra and classical
N-body simulation. However, they do not fully address the
performance bottlenecks of algorithms with fine-grained
tasks and the resultant large task graphs. Additionally, we
experienced the description of large task graphs to be
cumbersome with the common approach of providing in-, out-
and inout-dependencies. We introduce event-based task
parallelism to solve the performance and programmability
issues for algorithms that exhibit fine-grained task
parallelism and contain repetitive task patterns. With
user-defined event lists, the approach provides a more
convenient and compact way to describe large task graphs.
Furthermore, we show how these event lists are processed by
a task engine that reuses user-defined, algorithmic data
structures. As use case, we describe the implementation of a
fast multipole method for molecular dynamics with
event-based task parallelism. The performance analysis
reveals that the event-based implementation is 52 $\%$
faster than a classical loop-parallel implementation with
OpenMP.},
month = {Jun},
date = {2020-06-29},
organization = {PASC '20: Platform for Advanced
Scientific Computing Conference, Geneva
(Switzerland), 29 Jun 2020 - 1 Jul
2020},
cin = {JSC / IAS-7},
cid = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)IAS-7-20180321},
pnm = {511 - Computational Science and Mathematical Methods
(POF3-511) / PhD no Grant - Doktorand ohne besondere
Förderung (PHD-NO-GRANT-20170405)},
pid = {G:(DE-HGF)POF3-511 / G:(DE-Juel1)PHD-NO-GRANT-20170405},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.1145/3394277.3401858},
url = {https://juser.fz-juelich.de/record/889149},
}