% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Khler:1031656,
author = {Köhler, Cristiano A. and Kern, Moritz and Grün, Sonja and
Denker, Michael},
title = {{T}racking the provenance of data generation and analysis
in {NEST} simulations},
reportid = {FZJ-2024-05772},
year = {2024},
abstract = {Neural simulations using NEST are typically executed by a
Python script that configures the simulator kernel, builds
the network, and runs the simulation. The result is a series
of files containing the simulated network activity, which
can then be analyzed to provide insights into the neural
activity. Despite the availability of file headers to
identify the origin of the outputs, a user analyzing the
data must still interpret the findings with respect to the
simulation setup, network connectivity, and parameters of
the neuronal and synaptic models. This information is not
immediately available, as the exact details of the
simulation configuration are understandable only by
referring to the original script, which makes it challenging
to share simulation results, especially in collaborative
contexts. In addition, the researcher may change simulation
parameters over time, and tracking those changes becomes
increasingly difficult among collaborators with access to
shared files with the simulation output. Therefore, the
final results of a NEST simulation lack detailed provenance
to link each output to the detailed description of how the
network was instantiated and run.Here we showcase how Alpaca
(doi:10.5281/zenodo.10276510; $RRID:SCR_023739)$ [1] helps
to capture provenance in a typical NEST simulation
experiment and subsequent data analysis with the Elephant
(doi:10.5281/zenodo.1186602; $RRID:SCR_003833)$ toolbox [2].
Alpaca is a toolbox that captures provenance during the
execution of Python scripts. It uses decorators to record
the details of each function executed and associated data
objects. First, we demonstrate that Alpaca can capture
end-to-end provenance in a workflow that executes multiple
simulations with distinct parameters and performs a combined
analysis of all generated data. Second, we highlight how
data objects are annotated with simulation details using the
Neo library [3] to identify the data source in the
simulation. Third, we show how the details of the network
creation using the PyNEST interface are captured and related
to each data output and analysis result. In the end, this
approach contributes to representing the simulated data and
analysis results according to the FAIR principles [4]. The
results findability is improved with the detailed
provenance, the interoperability is supported by a
standardized data model, and data may be reused due to the
enhanced description of the data generation and analysis
processes. REFERENCES [1] Köhler, C.A., Ulianych, D.,
Grün, S., Decker, S., Denker, M., 2024. Facilitating the
sharing of electrophysiology data analysis results through
in-depth provenance capture. eNeuro 11, ENEURO.0476-23.2024,
10.1523/ENEURO.0476-23.2024[2] Denker, M., Yegenoglu, A.,
Grün, S., 2018. Collaborative HPC-enabled workflows on the
HBP Collaboratory using the Elephant framework.
Neuroinformatics 2018, P19, 10.12751/incf.ni2018.0019[3]
Garcia, S., Guarino, D., Jaillet, F., Jennings, T.,
Pröpper, R., Rautenberg, P.L., Rodgers, C.C., Sobolev, A.,
Wachtler, T., Yger, P., Davison, A.P., 2014. Neo: an object
model for handling electrophysiology data in multiple
formats. Frontiers in Neuroinformatics 8, 10,
10.3389/fninf.2014.00010[4] Wilkinson, M.D., Dumontier, M.,
Aalbersberg, Ij.J., Appleton, G., Axton, M., Baak, A.,
Blomberg, N. et al., 2016. The FAIR Guiding Principles for
scientific data management and stewardship. Scientific Data
3, 160018, 10.1038/sdata.2016.18},
month = {Sep},
date = {2024-09-29},
organization = {Bernstein Conference 2024, Frankfurt
am Main (Germany), 29 Sep 2024 - 2 Oct
2024},
subtyp = {After Call},
keywords = {Computational Neuroscience (Other) / Data analysis, machine
learning and neuroinformatics (Other)},
cin = {IAS-6 / INM-10},
cid = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)INM-10-20170113},
pnm = {5235 - Digitization of Neuroscience and User-Community
Building (POF4-523) / 5231 - Neuroscientific Foundations
(POF4-523) / HDS LEE - Helmholtz School for Data Science in
Life, Earth and Energy (HDS LEE) (HDS-LEE-20190612) /
EBRAINS 2.0 - EBRAINS 2.0: A Research Infrastructure to
Advance Neuroscience and Brain Health (101147319) / HBP SGA2
- Human Brain Project Specific Grant Agreement 2 (785907) /
HBP SGA3 - Human Brain Project Specific Grant Agreement 3
(945539) / Algorithms of Adaptive Behavior and their
Neuronal Implementation in Health and Disease
(iBehave-20220812) / JL SMHB - Joint Lab Supercomputing and
Modeling for the Human Brain (JL SMHB-2021-2027)},
pid = {G:(DE-HGF)POF4-5235 / G:(DE-HGF)POF4-5231 /
G:(DE-Juel1)HDS-LEE-20190612 / G:(EU-Grant)101147319 /
G:(EU-Grant)785907 / G:(EU-Grant)945539 /
G:(DE-Juel-1)iBehave-20220812 / G:(DE-Juel1)JL
SMHB-2021-2027},
typ = {PUB:(DE-HGF)24},
doi = {10.12751/NNCN.BC2024.031},
url = {https://juser.fz-juelich.de/record/1031656},
}