% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Winkel:17919,
author = {Winkel, M. and Speck, R. and Hübner, H. and Arnold, L. and
Krause, R. and Gibbon, P.},
title = {{A} massively parallel, multi-disciplinary {B}arnes-{H}ut
tree code for extreme-scale {N}-body simulations},
journal = {Computer physics communications},
volume = {183},
issn = {0010-4655},
address = {Amsterdam},
publisher = {North Holland Publ. Co.},
reportid = {PreJuSER-17919},
pages = {880 - 889},
year = {2012},
note = {The authors gratefully acknowledge the helpful support by
Julich Supercomputing Centre and the JSC staff, especially
M. Stephan and J. Docter. This work was supported in part by
the Alliance Program of the Helmholtz Association
(HA216/EMMI), the BMBF project ScaFaCoS and the EU TEXT
project, as well as additional computing time via the VSR
project JZAM04. R.S. and R.K. would like to thank the Swiss
Platform for High-Performance and High-Productivity
Computing (HP2C) for funding and support.},
abstract = {The efficient parallelization of fast multipole-based
algorithms for the N-body problem is one of the most
challenging topics in high performance scientific computing.
The emergence of non-local, irregular communication patterns
generated by these algorithms can easily create an
insurmountable bottleneck on supercomputers with hundreds of
thousands of cores. To overcome this obstacle we have
developed an innovative parallelization strategy for
Barnes-Hut tree codes on present and upcoming HPC multicore
architectures. This scheme, based on a combined MPI-Pthreads
approach, permits an efficient overlap of computation and
data exchange. We highlight the capabilities of this method
on the full IBM Blue Gene/P system JUGENE at inch
Supercomputing Centre and demonstrate scaling across 299,912
cores with up to 2,048,000,000 particles. Applying our
implementation PEPC to laser-plasma interaction and vortex
particle methods close to the continuum limit, we
demonstrate its potential for ground-breaking advances in
large-scale particle simulations. (C) 2011 Elsevier B.V. All
rights reserved.},
keywords = {J (WoSType)},
cin = {JSC},
ddc = {004},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {Scientific Computing (FUEK411) / 411 - Computational
Science and Mathematical Methods (POF2-411)},
pid = {G:(DE-Juel1)FUEK411 / G:(DE-HGF)POF2-411},
shelfmark = {Computer Science, Interdisciplinary Applications / Physics,
Mathematical},
typ = {PUB:(DE-HGF)16},
UT = {WOS:000301028700004},
doi = {10.1016/j.cpc.2011.12.013},
url = {https://juser.fz-juelich.de/record/17919},
}