% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Villamar:1030590,
      author       = {Villamar, Jose and Kelbling, Matthias and More, Heather and
                      Denker, Michael and Tetzlaff, Tom and Senk, Johanna and
                      Thober, Stephan},
      title        = {{M}etadata practices for simulation workflows},
      publisher    = {arXiv},
      reportid     = {FZJ-2024-05343, 2408.17309},
      year         = {2024},
      abstract     = {Computer simulations are an essential pillar of knowledge
                      generation in science.Understanding, reproducing, and
                      exploring the results of simulations relies on tracking and
                      organizing metadata describing numerical
                      experiments.However, the models used to understand
                      real-world systems, and the computational machinery required
                      to simulate them, are typically complex, and produce large
                      amounts of heterogeneous metadata.Here, we present general
                      practices for acquiring and handling metadata that are
                      agnostic to software and hardware, and highly flexible for
                      the user.These consist of two steps: 1) recording and
                      storing raw metadata, and 2) selecting and structuring
                      metadata.As a proof of concept, we develop the Archivist, a
                      Python tool to help with the second step, and use it to
                      apply our practices to distinct high-performance computing
                      use cases from neuroscience and hydrology.Our practices and
                      the Archivist can readily be applied to existing workflows
                      without the need for substantial restructuring.They support
                      sustainable numerical workflows, facilitating
                      reproducibility and data reuse in generic simulation-based
                      research.},
      keywords     = {Information Retrieval (cs.IR) (Other) / FOS: Computer and
                      information sciences (Other)},
      cin          = {IAS-6 / IAS-9},
      cid          = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)IAS-9-20201008},
      pnm          = {5232 - Computational Principles (POF4-523) / 1121 -
                      Digitalization and Systems Technology for Flexibility
                      Solutions (POF4-112) / MetaMoSim - Generic metadata
                      management for reproducible high-performance-computing
                      simulation workflows - MetaMoSim (ZT-I-PF-3-026) / EBRAINS
                      2.0 - EBRAINS 2.0: A Research Infrastructure to Advance
                      Neuroscience and Brain Health (101147319) / Helmholtz
                      Platform for Research Software Engineering - Preparatory
                      Study $(HiRSE_PS-20220812)$ / ACA - Advanced Computing
                      Architectures (SO-092) / JL SMHB - Joint Lab Supercomputing
                      and Modeling for the Human Brain (JL SMHB-2021-2027) /
                      Brain-Scale Simulations $(jinb33_20220812)$ / ICEI -
                      Interactive Computing E-Infrastructure for the Human Brain
                      Project (800858)},
      pid          = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-1121 /
                      G:(DE-Juel-1)ZT-I-PF-3-026 / G:(EU-Grant)101147319 /
                      $G:(DE-Juel-1)HiRSE_PS-20220812$ / G:(DE-HGF)SO-092 /
                      G:(DE-Juel1)JL SMHB-2021-2027 /
                      $G:(DE-Juel1)jinb33_20220812$ / G:(EU-Grant)800858},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/arXiv.2408.17309},
      url          = {https://juser.fz-juelich.de/record/1030590},
}