% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Teijeiro:860718,
      author       = {Teijeiro, Carlos and Hammerschmidt, Thomas and Drautz, Ralf
                      and Sutmann, Godehard},
      title        = {{O}ptimized parallel simulations of analytic bond-order
                      potentials on hybrid shared/distributed memory with {MPI}
                      and {O}pen{MP}},
      journal      = {The international journal of high performance computing
                      applications},
      volume       = {32},
      number       = {2},
      issn         = {1741-2846},
      address      = {Thousand Oaks, Calif.},
      publisher    = {Sage Science Press},
      reportid     = {FZJ-2019-01383},
      pages        = {227 - 241},
      year         = {2019},
      abstract     = {Analytic bond-order potentials (BOPs) allow to obtain a
                      highly accurate description of interatomic interactions at a
                      reasonablecomputational cost. However, for simulations with
                      very large systems, the high memory demands require theuse
                      of a parallel implementation, which at the same time also
                      optimizes the use of computational resources. The
                      calculationsof analytic BOPs are performed for a restricted
                      volume around every atom and therefore have shown to be
                      wellsuited for a message passing interface (MPI)-based
                      parallelization based on a domain decomposition scheme, in
                      whichone process manages one big domain using the entire
                      memory of a compute node. On the basis of this approach,
                      thepresent work focuses on the analysis and enhancement of
                      its performance on shared memory by using OpenMP threadson
                      each MPI process, in order to use many cores per node to
                      speed up computations and minimize memory
                      bottlenecks.Different algorithms are described and their
                      corresponding performance results are presented, showing
                      significantperformance gains for highly parallel systems
                      with hybrid MPI/OpenMP simulations up to several thousands
                      of threads.},
      cin          = {JSC},
      ddc          = {004},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511)},
      pid          = {G:(DE-HGF)POF3-511},
      typ          = {PUB:(DE-HGF)16},
      UT           = {WOS:000458830600001},
      doi          = {10.1177/1094342017727060},
      url          = {https://juser.fz-juelich.de/record/860718},
}