% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Karp:1033721,
      author       = {Karp, Martin and Suarez, Estela and Meinke, Jan H and
                      Andersson, Måns I and Schlatter, Philipp and Markidis,
                      Stefano and Jansson, Niclas},
      title        = {{E}xperience and analysis of scalable high-fidelity
                      computational fluid dynamics on modular supercomputing
                      architectures},
      journal      = {The international journal of high performance computing
                      applications},
      volume       = {39},
      number       = {3},
      issn         = {1741-2846},
      address      = {Thousand Oaks, Calif.},
      publisher    = {Sage Science Press},
      reportid     = {FZJ-2024-06575},
      pages        = {329-344},
      year         = {2025},
      abstract     = {The never-ending computational demand from simulations of
                      turbulence makes computational fluid dynamics (CFD) a prime
                      application use case for current and future exascale
                      systems. High-order finite element methods, such as the
                      spectral element method, have been gaining traction as they
                      offer high performance on both multicore CPUs and modern
                      GPU-based accelerators. In this work, we assess how
                      high-fidelity CFD using the spectral element method can
                      exploit the modular supercomputing architecture at scale
                      through domain partitioning, where the computational domain
                      is split between a Booster module powered by GPUs and a
                      Cluster module with conventional CPU nodes. We investigate
                      several different flow cases and computer systems based on
                      the Modular Supercomputing Architecture (MSA). We observe
                      that for our simulations, the communication overhead and
                      load balancing issues incurred by incorporating different
                      computing architectures are seldom worthwhile, especially
                      when I/O is also considered, but when the simulation at hand
                      requires more than the combined global memory on the GPUs,
                      utilizing additional CPUs to increase the available memory
                      can be fruitful. We support our results with a simple
                      performance model to assess when running across modules
                      might be beneficial. As MSA is becoming more widespread and
                      efforts to increase system utilization are growing more
                      important our results give insight into when and how a
                      monolithic application can utilize and spread out to more
                      than one module and obtain a faster time to solution.},
      cin          = {JSC},
      ddc          = {004},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5122 - Future Computing $\&$ Big Data Systems (POF4-512) /
                      5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511)},
      pid          = {G:(DE-HGF)POF4-5122 / G:(DE-HGF)POF4-5111},
      typ          = {PUB:(DE-HGF)16},
      UT           = {WOS:001366656300001},
      doi          = {10.1177/10943420241303163},
      url          = {https://juser.fz-juelich.de/record/1033721},
}