% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Villamar:1043152,
author = {Villamar, Jose and Kelbling, Matthias and More, Heather and
Denker, Michael and Tetzlaff, Tom and Senk, Johanna and
Thober, Stephan},
title = {{M}etadata practices for simulation workflows},
journal = {Scientific data},
volume = {12},
issn = {2052-4436},
address = {London},
publisher = {Nature Publ. Group},
reportid = {FZJ-2025-02769},
pages = {942},
year = {2025},
abstract = {Computer simulations are an essential pillar of knowledge
generation in science. Exploring, understanding,
reproducing, and sharing the results of simulations relies
on tracking and organizing the metadata describing the
numerical experiments. The models used to understand
real-world systems, and the computational machinery required
to simulate them, are typically complex, and produce large
amounts of heterogeneous metadata. Here, we present general
practices for acquiring and handling metadata that are
agnostic to software and hardware, and highly flexible for
the user. These consist of two steps: 1) recording and
storing raw metadata, and 2) selecting and structuring
metadata. As a proof of concept, we develop the Archivist, a
Python tool to help with the second step, and use it to
apply our practices to distinct high-performance computing
use cases from neuroscience and hydrology. Our practices and
the Archivist can readily be applied to existing workflows
without the need for substantial restructuring. They support
sustainable numerical workflows, fostering replicability,
reproducibility, data exploration, and data sharing in
simulation-based research.},
keywords = {Information Retrieval (cs.IR) (Other) / FOS: Computer and
information sciences (Other)},
cin = {IAS-6 / IAS-9},
ddc = {500},
cid = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)IAS-9-20201008},
pnm = {5232 - Computational Principles (POF4-523) / 5235 -
Digitization of Neuroscience and User-Community Building
(POF4-523) / MetaMoSim - Generic metadata management for
reproducible high-performance-computing simulation workflows
- MetaMoSim (ZT-I-PF-3-026) / HiRSE - Helmholtz Platform for
Research Software Engineering (HiRSE-20250220) / Advanced
Computing Architectures $(aca_20190115)$ / EBRAINS 2.0 -
EBRAINS 2.0: A Research Infrastructure to Advance
Neuroscience and Brain Health (101147319) / Brain-Scale
Simulations $(jinb33_20220812)$ / ICEI - Interactive
Computing E-Infrastructure for the Human Brain Project
(800858) / JL SMHB - Joint Lab Supercomputing and Modeling
for the Human Brain (JL SMHB-2021-2027) / DFG project
G:(GEPRIS)491111487 - Open-Access-Publikationskosten / 2025
- 2027 / Forschungszentrum Jülich (OAPKFZJ) (491111487)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5235 /
G:(DE-Juel-1)ZT-I-PF-3-026 / G:(DE-Juel-1)HiRSE-20250220 /
$G:(DE-Juel1)aca_20190115$ / G:(EU-Grant)101147319 /
$G:(DE-Juel1)jinb33_20220812$ / G:(EU-Grant)800858 /
G:(DE-Juel1)JL SMHB-2021-2027 / G:(GEPRIS)491111487},
typ = {PUB:(DE-HGF)16},
pubmed = {40473681},
UT = {WOS:001503948500006},
doi = {10.1038/s41597-025-05126-1},
url = {https://juser.fz-juelich.de/record/1043152},
}