% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Herten:824109,
author = {Herten, Andreas and Pleiter, Dirk and Brömmel, Dirk},
title = {{A}ccelerating {P}lasma {P}hysics with {GPU}s},
school = {Sapienza Università di Roma},
reportid = {FZJ-2016-06733},
year = {2016},
abstract = {JuSPIC is a particle-in-cell (PIC) code, developed in the
Simulation Lab for Plasma Physics of the Jülich
Supercomputing Centre. The open source code is based on PSC
by H. Ruhl, slimmed-down and rewritten in modern Fortran.
JuSPIC simulates particles under the influence of
electromagnetic fields, using the relativistic Vlasov
equation and Maxwell's equations (integrated using the
Finite Difference Time Domain scheme). The program uses a
regular mesh for the Maxwell fields and the particle
charge/current densities. Inside the mesh, quasi-particles
with continuous coordinates are modeled via distribution
functions. JuSPIC is part of the High-Q club, attesting that
it can efficiently scale to the full JUQUEEN supercomputer
(currently the #13 on the Top 500 list): 1.8 million threads
running on 458 thousand cores can collaboratively compute
plasma simulations. Local node-level parallelism is achieved
by means of OpenMP, communication between nodes relies on
MPI. To leverage the latest generation of supercomputers
coming equipped with dedicated accelerator technologies
(GPUs and other many-core architectures), JuSPIC is
currently being extended. In this poster we present a
GPU-accelerated version of the program, making use of
different programming models. We show first results of
performance studies, comparing OpenACC and CUDA. While
OpenACC aims to offer portability and flexibility by means
of few changes to the code, the performance of the generated
program might suffer in practice. To measure the deficit,
the compute-intensive parts of the program are in addition
also implemented in CUDA Fortran. To explore scalability
properties of the application for static particle
distributions on a heterogeneous architecture, we make use
of semi-empirical performance models.},
month = {Sep},
date = {2016-09-26},
organization = {Perspectives of GPU computing in
Science, Rome (Italy), 26 Sep 2016 - 28
Sep 2016},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {511 - Computational Science and Mathematical Methods
(POF3-511)},
pid = {G:(DE-HGF)POF3-511},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/824109},
}