% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Thiess:21269,
      author       = {Thiess, A. and Zeller, R. and Bolten, M. and Dederichs, P.
                      H. and Blügel, S.},
      title        = {{M}assively parallel density functional calculations for
                      thousands of atoms: {KKR}nano},
      journal      = {Physical review / B},
      volume       = {85},
      number       = {23},
      issn         = {1098-0121},
      address      = {College Park, Md.},
      publisher    = {APS},
      reportid     = {PreJuSER-21269},
      pages        = {235103},
      year         = {2012},
      note         = {We like to thank W. Lambrecht and P. Mavropoulos for
                      fruitful discussions. Financial support of the DAAD and both
                      computational resources as well as technical support of the
                      Julich Supercomputing Center are gratefully acknowledged.
                      This work benefited from discussions within the SFB 917
                      Nanoswitches.},
      abstract     = {Applications of existing precise electronic-structure
                      methods based on density functional theory are typically
                      limited to the treatment of about 1000 inequivalent atoms,
                      which leaves unresolved many open questions in material
                      science, e. g., on complex defects, interfaces,
                      dislocations, and nanostructures. KKRnano is a new massively
                      parallel linear scaling all-electron density functional
                      algorithm in the framework of the Korringa-Kohn-Rostoker
                      (KKR) Green's-function method. We conceptualized, developed,
                      and optimized KKRnano for large-scale applications of many
                      thousands of atoms without compromising on the precision of
                      a full-potential all-electron method, i.e., it is a method
                      without any shape approximation of the charge density or
                      potential. A key element of the new method is the iterative
                      solution of the sparse linear Dyson equation, which we
                      parallelized atom by atom, across energy points in the
                      complex plane and for each spin degree of freedom using the
                      message passing interface standard, followed by a
                      lower-level OpenMP parallelization. This hybrid four-level
                      parallelization allows for an efficient use of up to 100 000
                      processors on the latest generation of supercomputers. The
                      iterative solution of the Dyson equation is significantly
                      accelerated, employing preconditioning techniques making use
                      of coarse-graining principles expressed in a block-circulant
                      preconditioner. In this paper, we will describe the
                      important elements of this new algorithm, focusing on the
                      parallelization and preconditioning and showing scaling
                      results for NiPd alloys up to 8192 atoms and 65 536
                      processors. At the end, we present an order-N algorithm for
                      large-scale simulations of metallic systems, making use of
                      the nearsighted principle of the KKR Green's-function
                      approach by introducing a truncation of the electron
                      scattering to a local cluster of atoms, the size of which is
                      determined by the requested accuracy. By exploiting this
                      algorithm, we show linear scaling calculations of more than
                      16 000 NiPd atoms.},
      keywords     = {J (WoSType)},
      cin          = {PGI-1 / IAS-1 / JARA-FIT / JARA-SIM / PGI-2 / JARA-HPC},
      ddc          = {530},
      cid          = {I:(DE-Juel1)PGI-1-20110106 / I:(DE-Juel1)IAS-1-20090406 /
                      $I:(DE-82)080009_20140620$ / I:(DE-Juel1)VDB1045 /
                      I:(DE-Juel1)PGI-2-20110106 / $I:(DE-82)080012_20140620$},
      pnm          = {Grundlagen für zukünftige Informationstechnologien /
                      Quantensimulation $f\u00fcr$ realistische
                      $Grenzfl\u00e4chen$ in Nanosystemen $(jiff02_20090701)$},
      pid          = {G:(DE-Juel1)FUEK412 / $G:(DE-Juel1)jiff02_20090701$},
      shelfmark    = {Physics, Condensed Matter},
      typ          = {PUB:(DE-HGF)16},
      UT           = {WOS:000304748900001},
      doi          = {10.1103/PhysRevB.85.235103},
      url          = {https://juser.fz-juelich.de/record/21269},
}