% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Morgenstern:884765,
author = {Morgenstern, Laura and Haensel, David and Beckmann, Andreas
and Kabadshow, Ivo},
title = {{NUMA}-{A}wareness as a {P}lug-{I}n for an
{E}ventify-{B}ased {F}ast {M}ultipole {M}ethod},
reportid = {FZJ-2020-03241},
isbn = {978-3-030-50436-6},
pages = {428-441},
year = {2020},
comment = {Computational Science – ICCS 2020},
booktitle = {Computational Science – ICCS 2020},
abstract = {Following the trend towards Exascale, today’s
supercomputers consist of increasingly complex and
heterogeneous compute nodes. To exploit the performance of
these systems, research software in HPC needs to keep up
with the rapid development of hardware architectures. Since
manual tuning of software to each and every architecture is
neither sustainable nor viable, we aim to tackle this
challenge through appropriate software design. In this
article, we aim to improve the performance and
sustainability of FMSolvr, a parallel Fast Multipole Method
for Molecular Dynamics, by adapting it to Non-Uniform Memory
Access architectures in a portable and maintainable way. The
parallelization of FMSolvr is based on Eventify, an
event-based tasking framework we co-developed with FMSolvr.
We describe a layered software architecture that enables the
separation of the Fast Multipole Method from its
parallelization. The focus of this article is on the
development and analysis of a reusable NUMA module that
improves performance while keeping both layers separated to
preserve maintainability and extensibility. By means of the
NUMA module we introduce diverse NUMA-aware data
distribution, thread pinning and work stealing policies for
FMSolvr. During the performance analysis the modular design
of the NUMA module was advantageous since it facilitates
combination, interchange and redesign of the developed
policies. The performance analysis reveals that the runtime
of FMSolvr is reduced by $21\%$ from 1.48 ms to 1.16 ms
through these policies.},
month = {Jun},
date = {2020-06-03},
organization = {International Conference on
Computational Science 2020, Amsterdam
(The Netherlands), 3 Jun 2020 - 5 Jun
2020},
cin = {JSC / IAS-7},
cid = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)IAS-7-20180321},
pnm = {511 - Computational Science and Mathematical Methods
(POF3-511) / PhD no Grant - Doktorand ohne besondere
Förderung (PHD-NO-GRANT-20170405)},
pid = {G:(DE-HGF)POF3-511 / G:(DE-Juel1)PHD-NO-GRANT-20170405},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
UT = {WOS:000841686400031},
doi = {10.1007/978-3-030-50436-6_31},
url = {https://juser.fz-juelich.de/record/884765},
}