% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Mozaffari:906810,
      author       = {Mozaffari, Amirpasha and Langguth, Michael and Gong, Bing
                      and Ahring, Jessica and Campos, Adrian Rojas and Nieters,
                      Pascal and Escobar, Otoniel José Campos and Wittenbrink,
                      Martin and Baumann, Peter and Schultz, Martin G.},
      title        = {{HPC}-oriented {C}anonical {W}orkflows for {M}achine
                      {L}earning {A}pplications in {C}limate and {W}eather
                      {P}rediction},
      journal      = {Data Intelligence},
      volume       = {4},
      number       = {2},
      issn         = {2096-7004},
      address      = {Cambridge, MA},
      publisher    = {MIT Press},
      reportid     = {FZJ-2022-01707},
      pages        = {271-285},
      year         = {2022},
      abstract     = {Machine learning (ML) applications in weather and climate
                      are gaining momentum as big data and the immense increase in
                      High-performance computing (HPC) power are paving the way.
                      Ensuring FAIR data and reproducible ML practices are
                      significant challenges for Earth system researchers. Even
                      though the FAIR principle is well known to many scientists,
                      research communities are slow to adopt them. Canonical
                      Workflow Framework for Research (CWFR) provides a platform
                      to ensure the FAIRness and reproducibility of these
                      practices without overwhelming researchers. This conceptual
                      paper envisions a holistic CWFR approach towards ML
                      applications in weather and climate, focusing on HPC and big
                      data. Specifically, we discuss Fair Digital Object (FDO) and
                      Research Object (RO) in the DeepRain project to achieve
                      granular reproducibility. DeepRain is a project that aims to
                      improve precipitation forecast in Germany by using ML. Our
                      concept envisages the raster datacube to provide data
                      harmonization and fast and scalable data access. We suggest
                      the Juypter notebook as a single reproducible experiment. In
                      addition, we envision JuypterHub as a scalable and
                      distributed central platform that connects all these
                      elements and the HPC resources to the researchers via an
                      easy-to-use graphical interface.},
      cin          = {JSC},
      ddc          = {020},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / Verbundprojekt
                      DeepRain: Effiziente Lokale Niederschlagsvorhersage durch
                      Maschinelles Lernen (01IS18047A) / Earth System Data
                      Exploration (ESDE) / Earth System Data Exploration (ESDE)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(BMBF)01IS18047A /
                      G:(DE-Juel-1)ESDE / G:(DE-Juel-1)ESDE},
      typ          = {PUB:(DE-HGF)16},
      UT           = {WOS:000850893200010},
      doi          = {10.1162/dint_a_00131},
      url          = {https://juser.fz-juelich.de/record/906810},
}