% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Ghasemi:889314,
      author       = {Ghasemi, Abouzar and Gibbon, Paul and Beale, Steven},
      title        = {{P}erformance {E}valuation of {O}pen{FOAM} on {J}uelich
                      {S}upercomputing {F}acilities ({JURECA}, {JUWELS} and
                      {JUSUF})},
      reportid     = {FZJ-2021-00206},
      year         = {2020},
      abstract     = {The Forschungszentrum Juelich is utilizing OpenFOAM to
                      perform calculations for electrochemical devices such as
                      electrolyzers and fuel cells. For some time now, improving
                      the performance of the OpenFOAM software suite on HPC
                      facilities has posed a challenge for users wanting to scale
                      up problem sizes to several tens of million grid cells. To
                      keep time-to-solution within reasonable limits, this
                      requires scalability to many thousands of compute cores.
                      Herein, we investigate the performance and scalability of
                      the OpenFOAM-v1912 for the well-known motorbike benchmark
                      test case, including a strong scaling study of the
                      SimpleFoam solver at the Juelich supercomputing facilities
                      (JUWELS, JURECA, JUSUF). Later we will consider in-house
                      electrochemical models, with specialized solvers and domain
                      decomposition techniques. JURECA was built in 2015 and
                      utilizes a Haswell processor with 24 cores per node, JUWELS
                      was built in 2018 based on a Skylake processor and with 48
                      cores per node, and JUSUF was built in 2020, based on an AMD
                      EPYC 7742 with 128 cores per node. Scalability limits of up
                      to 3000 and 1000 compute cores respectively is found for
                      cases with and without writing data output to disk. It is
                      shown that the impact of the output can be quite severe when
                      using the default output generation option, but that this
                      can be substantially mitigated by making use of the ADIOS
                      library. By analysing the most time-consuming MPI function
                      calls (collective and point-to-point), as well as the
                      performance of the different MPI implementations,
                      OPENMPI4.02, INTELMPI2018/2019 and PARASTATION-MPI5.4,
                      further potential optimizations are identified. Finally, we
                      discuss the newly added library “PETSc4FOAM” in OpenFoam
                      in order to make use of external sparse linear solvers such
                      as PETSc/Hypre.},
      month         = {Oct},
      date          = {2020-10-13},
      organization  = {8th OpenFOAM Conference 2020, Online
                       (Online), 13 Oct 2020 - 15 Oct 2020},
      subtyp        = {After Call},
      cin          = {JSC / IEK-14},
      cid          = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)IEK-14-20191129},
      pnm          = {135 - Fuel Cells (POF3-135) / 511 - Computational Science
                      and Mathematical Methods (POF3-511)},
      pid          = {G:(DE-HGF)POF3-135 / G:(DE-HGF)POF3-511},
      typ          = {PUB:(DE-HGF)6},
      url          = {https://juser.fz-juelich.de/record/889314},
}