% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Friedemann:1037349,
      author       = {Friedemann, Sebastian and Keller, Kai and Lu, Yen-Sen and
                      Raffin, Bruno and Bautista-Gomez, Leonardo},
      title        = {{D}ynamic load/propagate/store for data assimilation with
                      particle filters on supercomputers},
      journal      = {Journal of computational science},
      volume       = {76},
      issn         = {1877-7503},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier},
      reportid     = {FZJ-2025-00662},
      pages        = {102229},
      year         = {2024},
      abstract     = {Several ensemble-based Data Assimilation (DA) methods rely
                      on a propagate/update cycle, where a potentially compute
                      intensive simulation code propagates multiple states for
                      several consecutive time steps, that are then analyzed to
                      update the states to be propagated for the next cycle. In
                      this paper we focus on DA methods where the update can be
                      computed by gathering only lightweight data obtained
                      independently from each of the propagated states. This
                      encompasses particle filters where one weight is computed
                      from each state, but also methods like Approximate Bayesian
                      Computation (ABC) or Markov Chain Monte Carlo (MCMC). Such
                      methods can be very compute intensive and running
                      efficiently at scale on supercomputers is challenging. This
                      paper proposes a framework based on an elastic and
                      fault-tolerant runner/server architecture minimizing data
                      movements while enabling dynamic load balancing. Our
                      approach relies on runners that load, propagate and store
                      particles from an asynchronously managed distributed
                      particle cache permitting particles to move from one runner
                      to another in the background while particle propagation
                      proceeds. The framework is validated with a bootstrap
                      particle filter with the WRF simulation code. We handle up
                      to 2555 particles on 20,442 compute cores. Compared to a
                      file-based implementation, our solution spends up to 2.84
                      less resources (cores×seconds) per particle.},
      cin          = {JSC},
      ddc          = {004},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / EoCoE-II - Energy
                      Oriented Center of Excellence : toward exascale for energy
                      (824158)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)824158},
      typ          = {PUB:(DE-HGF)16},
      UT           = {WOS:001185868400001},
      doi          = {10.1016/j.jocs.2024.102229},
      url          = {https://juser.fz-juelich.de/record/1037349},
}