% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Thiess:21269,
author = {Thiess, A. and Zeller, R. and Bolten, M. and Dederichs, P.
H. and Blügel, S.},
title = {{M}assively parallel density functional calculations for
thousands of atoms: {KKR}nano},
journal = {Physical review / B},
volume = {85},
number = {23},
issn = {1098-0121},
address = {College Park, Md.},
publisher = {APS},
reportid = {PreJuSER-21269},
pages = {235103},
year = {2012},
note = {We like to thank W. Lambrecht and P. Mavropoulos for
fruitful discussions. Financial support of the DAAD and both
computational resources as well as technical support of the
Julich Supercomputing Center are gratefully acknowledged.
This work benefited from discussions within the SFB 917
Nanoswitches.},
abstract = {Applications of existing precise electronic-structure
methods based on density functional theory are typically
limited to the treatment of about 1000 inequivalent atoms,
which leaves unresolved many open questions in material
science, e. g., on complex defects, interfaces,
dislocations, and nanostructures. KKRnano is a new massively
parallel linear scaling all-electron density functional
algorithm in the framework of the Korringa-Kohn-Rostoker
(KKR) Green's-function method. We conceptualized, developed,
and optimized KKRnano for large-scale applications of many
thousands of atoms without compromising on the precision of
a full-potential all-electron method, i.e., it is a method
without any shape approximation of the charge density or
potential. A key element of the new method is the iterative
solution of the sparse linear Dyson equation, which we
parallelized atom by atom, across energy points in the
complex plane and for each spin degree of freedom using the
message passing interface standard, followed by a
lower-level OpenMP parallelization. This hybrid four-level
parallelization allows for an efficient use of up to 100 000
processors on the latest generation of supercomputers. The
iterative solution of the Dyson equation is significantly
accelerated, employing preconditioning techniques making use
of coarse-graining principles expressed in a block-circulant
preconditioner. In this paper, we will describe the
important elements of this new algorithm, focusing on the
parallelization and preconditioning and showing scaling
results for NiPd alloys up to 8192 atoms and 65 536
processors. At the end, we present an order-N algorithm for
large-scale simulations of metallic systems, making use of
the nearsighted principle of the KKR Green's-function
approach by introducing a truncation of the electron
scattering to a local cluster of atoms, the size of which is
determined by the requested accuracy. By exploiting this
algorithm, we show linear scaling calculations of more than
16 000 NiPd atoms.},
keywords = {J (WoSType)},
cin = {PGI-1 / IAS-1 / JARA-FIT / JARA-SIM / PGI-2 / JARA-HPC},
ddc = {530},
cid = {I:(DE-Juel1)PGI-1-20110106 / I:(DE-Juel1)IAS-1-20090406 /
$I:(DE-82)080009_20140620$ / I:(DE-Juel1)VDB1045 /
I:(DE-Juel1)PGI-2-20110106 / $I:(DE-82)080012_20140620$},
pnm = {Grundlagen für zukünftige Informationstechnologien /
Quantensimulation $f\u00fcr$ realistische
$Grenzfl\u00e4chen$ in Nanosystemen $(jiff02_20090701)$},
pid = {G:(DE-Juel1)FUEK412 / $G:(DE-Juel1)jiff02_20090701$},
shelfmark = {Physics, Condensed Matter},
typ = {PUB:(DE-HGF)16},
UT = {WOS:000304748900001},
doi = {10.1103/PhysRevB.85.235103},
url = {https://juser.fz-juelich.de/record/21269},
}