% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Villamar:1030590,
author = {Villamar, Jose and Kelbling, Matthias and More, Heather and
Denker, Michael and Tetzlaff, Tom and Senk, Johanna and
Thober, Stephan},
title = {{M}etadata practices for simulation workflows},
publisher = {arXiv},
reportid = {FZJ-2024-05343, 2408.17309},
year = {2024},
abstract = {Computer simulations are an essential pillar of knowledge
generation in science.Understanding, reproducing, and
exploring the results of simulations relies on tracking and
organizing metadata describing numerical
experiments.However, the models used to understand
real-world systems, and the computational machinery required
to simulate them, are typically complex, and produce large
amounts of heterogeneous metadata.Here, we present general
practices for acquiring and handling metadata that are
agnostic to software and hardware, and highly flexible for
the user.These consist of two steps: 1) recording and
storing raw metadata, and 2) selecting and structuring
metadata.As a proof of concept, we develop the Archivist, a
Python tool to help with the second step, and use it to
apply our practices to distinct high-performance computing
use cases from neuroscience and hydrology.Our practices and
the Archivist can readily be applied to existing workflows
without the need for substantial restructuring.They support
sustainable numerical workflows, facilitating
reproducibility and data reuse in generic simulation-based
research.},
keywords = {Information Retrieval (cs.IR) (Other) / FOS: Computer and
information sciences (Other)},
cin = {IAS-6 / IAS-9},
cid = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)IAS-9-20201008},
pnm = {5232 - Computational Principles (POF4-523) / 1121 -
Digitalization and Systems Technology for Flexibility
Solutions (POF4-112) / MetaMoSim - Generic metadata
management for reproducible high-performance-computing
simulation workflows - MetaMoSim (ZT-I-PF-3-026) / EBRAINS
2.0 - EBRAINS 2.0: A Research Infrastructure to Advance
Neuroscience and Brain Health (101147319) / Helmholtz
Platform for Research Software Engineering - Preparatory
Study $(HiRSE_PS-20220812)$ / ACA - Advanced Computing
Architectures (SO-092) / JL SMHB - Joint Lab Supercomputing
and Modeling for the Human Brain (JL SMHB-2021-2027) /
Brain-Scale Simulations $(jinb33_20220812)$ / ICEI -
Interactive Computing E-Infrastructure for the Human Brain
Project (800858)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-1121 /
G:(DE-Juel-1)ZT-I-PF-3-026 / G:(EU-Grant)101147319 /
$G:(DE-Juel-1)HiRSE_PS-20220812$ / G:(DE-HGF)SO-092 /
G:(DE-Juel1)JL SMHB-2021-2027 /
$G:(DE-Juel1)jinb33_20220812$ / G:(EU-Grant)800858},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/arXiv.2408.17309},
url = {https://juser.fz-juelich.de/record/1030590},
}