% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Benke:1048899,
      author       = {Benke, Jörg and Fahad, Muhammad and Caviedes Voullieme,
                      Daniel and Herten, Andreas and Kollet, Stefan},
      title        = {{P}orting the hydrologic model {P}ar{F}low to accelerator
                      architectures using e{DSL} and {K}okkos},
      reportid     = {FZJ-2025-05000},
      year         = {2025},
      abstract     = {The ParFlow hydrologic model is an integrated variably
                      saturated groundwater, surfacewater flow simulator that
                      incorporates subsurface energy transport and land
                      surfaceprocesses through the integration of the Common Land
                      Model (CLM) as a module. In addition ParFlow has been
                      coupled to atmospheric models, such as WRF, COSMO and ICON.
                      ParFlow is also integrated in the German climate and weather
                      prediction ICON(-Land) software ecosystem as part of the
                      WarmWorld project and it is an important component of the
                      Terrestrial Systems Modeling Platform (TSMP), which enables
                      integrated simulations from the bedrock, across the land
                      surface to the top of the atmosphere with the coupled
                      ICON/COSMO-CLM-ParFlow modeling system.ParFlow is written in
                      C (with additional Fortran 90 parts (especially if CLM is
                      enabled)) and uses the parallelization methods MPI, OpenMP,
                      native CUDA support and the programming model Kokkos (with
                      the backend CUDA, HIP or OpenMP). As a matter of fact, the
                      parallelism in ParFlow has been abstracted early on in what
                      is called an embedded Domain Specific Language (eDSL) which
                      is leading to a best-practice separation-of-concerns, which
                      means the domain scientist/developer does not see e.g. a
                      single MPI call when programming in ParFlow.Since future
                      hardware will be characterized by varying architectures
                      there was a demand to also enable HIP for AMD architectures.
                      For example, the pre-exascale HPC system LUMI of EuroHPC is
                      based on an accelerator consisting of AMD GPUs. To implement
                      HIP for ParFlow via the eDSL the porting was done using
                      Kokkos because Cuda was already incorporated in Parflow via
                      Kokkos and the Kokkos ecosystem also includes an
                      implementation of the HIP programming model for AMD GPUs and
                      which resulted for ParFlow in a high degree of performance
                      portability. Performance and scalability have been
                      demonstrated on JUWELS Booster (Jülich Supercomputing
                      Centre) for Nvidia GPUs (Nvidia A100) and also on the LUMI
                      supercomputer at CSC (Finland) with AMD MI250X
                      accelerators.In this poster we will present the eDSL of
                      ParFlow and also how Kokkos (with the Cuda and HIP backend)
                      is included in its eDSL and to allow ParFlow to reach
                      performance portability on the basis of the eDSL and Kokkos,
                      especially for Nvidia (Cuda) and on AMD platforms (HIP) with
                      the change of only a limited amount of lines. We will also
                      present scaling plots for different machines and
                      accelerators and how to enable HIP as a Kokkos backend of
                      ParFlow using the eDSL of ParFlow.},
      month         = {Feb},
      date          = {2025-02-25},
      organization  = {deRSE Conference 2025, Karlsruhe
                       (Germany), 25 Feb 2025 - 27 Feb 2025},
      subtyp        = {After Call},
      cin          = {JSC / IBG-3},
      cid          = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)IBG-3-20101118},
      pnm          = {2A5 - Exascale Earth System Modeling (CARF - CCA)
                      (POF4-2A5) / 5112 - Cross-Domain Algorithms, Tools, Methods
                      Labs (ATMLs) and Research Groups (POF4-511) / 5122 - Future
                      Computing $\&$ Big Data Systems (POF4-512) / EoCoE-III -
                      Energy oriented Center of Excellence
                      $(101143931_16HPC102K)$},
      pid          = {G:(DE-HGF)POF4-2A5 / G:(DE-HGF)POF4-5112 /
                      G:(DE-HGF)POF4-5122 / $G:(EU-Grant)101143931_16HPC102K$},
      typ          = {PUB:(DE-HGF)24},
      url          = {https://juser.fz-juelich.de/record/1048899},
}