% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Khler:1046208,
author = {Köhler, Cristiano and Grün, Sonja and Denker, Michael},
title = {{S}upporting {FAIR} {P}rinciples in {D}ata {A}nalysis
{T}hrough {S}emantically-{E}nriched {P}rovenance},
reportid = {FZJ-2025-03744},
year = {2025},
abstract = {Scripts that read input datasets and generate result files
are frequently used to construct computational workflows for
the analysis of neural activity data obtained by
electrophysiology recordings [1]. The increased complexity
of datasets due to recent advances in recording techniques
are also associated with increased computational costs for
executing those workflows and generating analysis results.
Increasing the FAIR-ness [2] of electrophysiology data
analysis results will promote the efficient sharing of
results among collaborators or the research community. With
increased findability, a collaborator can easily access
specific results produced by complex analysis without
rerunning costly computations. If results are more
accessible, they are transparent and can be reused across
platforms and organizations. The increased interoperability
facilitates understanding specific analysis results despite
their generation by heterogeneous workflows, improving
collaboration and allowing the comparison of different
analysis results. Finally, reusable results allow
researchers to build on previous analyses, conserving
resources and speeding scientific discovery without
repeating complex computations. In this work, we investigate
an approach for describing the results generated by
electrophysiology data analysis workflows in order to
increase the FAIR-ness of results. We aim to go beyond
providing source codes and free-text descriptions to
facilitate querying, introspection and reuse of the results
by capturing and evaluating run-time provenance information.
We highlight several challenges within the workflows that
hinder the creation of such descriptions, including the
iterative characteristics of conventional analysis
scenarios, the absence of technical and semantic
standardization, and the presence of distinct software
implementations for existing analysis methods [3]. To
address those challenges, we first implemented Alpaca
(Automatic Lightweight Provenance Capture) as a framework to
generate machine-readable descriptions of the workflow
execution with minimal user intervention [4]. Alpaca
produces a detailed provenance record of the atomic analysis
steps represented by Python functions within workflow
scripts, that are serialized together with analysis results
using the W3C PROV standard [5]. Complementing the approach,
the provenance information can be enriched with semantic
information provided by ontologies. For workflows analyzing
electrophysiolgy datasets with recorded neural activity, we
implemented the Neuroelectrophysiology Analysis Ontology
(NEAO) to provide a unified vocabulary to standardize the
descriptions of the methods involved in the analysis of
extracellular electrophysiology data [6]. We demonstrate how
using NEAO to enrich the provenance captured by Alpaca helps
in describing analysis results produced by complex
real-world workflows for analyzing and comparing
heterogeneous data based on Elephant [7] and Cobrawap [8].
We highlight how the approach facilitates obtaining insights
on the results (e.g., using knowledge graphs), thereby
promoting the FAIR principles and facilitating sharing. We
also discuss extensions to other computational workflows
(e.g., neural simulation) and how the proposed approach may
help to also improve representing their results according to
the FAIR principles. REFERENCES [1] M. Denker et al.,
“Reproducibility and efficiency in handling complex
neurophysiological data,” Neuroforum, vol. 27, no. 1, pp.
27–34, Feb, 2021, doi:
https://doi.org/10.1515/nf-2020-0041 [2] M. D. Wilkinson et
al., “The FAIR Guiding Principles for scientific data
management and stewardship,” Sci Data, vol. 3, p. 160018,
Mar, 2016, doi: https://doi.org/10.1038/sdata.2016.18 [3] V.
A. Unakafova and A. Gail, “Comparing Open-Source Toolboxes
for Processing and Analysis of Spike and Local Field
Potentials Data,” Front Neuroinform, vol. 13, p. 57, Jul,
2019, doi: https://doi.org/10.3389/fninf.2019.00057 [4] C.
A. Köhler et al., “Facilitating the Sharing of
Electrophysiology Data Analysis Results Through In-Depth
Provenance Capture,” eNeuro, vol. 11, no. 6, p.
ENEURO.0476-23.2024, May, 2024, doi:
https://doi.org/10.1523/ENEURO.0476-23.2024 [5] P. Groth and
L. Moreau. “An Overview of the PROV Family of
Documents.” PROV-Overview.
https://www.w3.org/TR/prov-overview (accessed on 28 April
2025) [6] C. A. Köhler, S. Grün, and M. Denker.
“Improving data sharing and knowledge transfer via the
Neuroelectrophysiology Analysis Ontology (NEAO),”
arXiv:2412.05021, Dec, 2024, doi:
https://doi.org/10.48550/arXiv.2412.05021 [7] R. Gutzen et
al., “A modular and adaptable analysis pipeline to compare
slow cerebral rhythms across heterogeneous datasets,” Cell
Rep Methods, vol. 4, no. 1, p. 100681, Jan, 2024, doi:
https://doi.org/10.1016/j.crmeth.2023.100681},
month = {Aug},
date = {2025-08-26},
organization = {2nd Conference on Research Data
Infrastructure (CoRDI), Aachen
(Germany), 26 Aug 2025 - 28 Aug 2025},
subtyp = {After Call},
keywords = {FAIR (Other) / electrophysiology (Other) / data analysis
(Other) / computational workflow (Other) / Python (Other) /
provenance (Other) / ontology (Other)},
cin = {IAS-6 / INM-10},
cid = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)INM-10-20170113},
pnm = {5235 - Digitization of Neuroscience and User-Community
Building (POF4-523) / 5231 - Neuroscientific Foundations
(POF4-523) / HDS LEE - Helmholtz School for Data Science in
Life, Earth and Energy (HDS LEE) (HDS-LEE-20190612) / HBP
SGA3 - Human Brain Project Specific Grant Agreement 3
(945539) / EBRAINS 2.0 - EBRAINS 2.0: A Research
Infrastructure to Advance Neuroscience and Brain Health
(101147319) / JL SMHB - Joint Lab Supercomputing and
Modeling for the Human Brain (JL SMHB-2021-2027) /
Algorithms of Adaptive Behavior and their Neuronal
Implementation in Health and Disease (iBehave-20220812)},
pid = {G:(DE-HGF)POF4-5235 / G:(DE-HGF)POF4-5231 /
G:(DE-Juel1)HDS-LEE-20190612 / G:(EU-Grant)945539 /
G:(EU-Grant)101147319 / G:(DE-Juel1)JL SMHB-2021-2027 /
G:(DE-Juel-1)iBehave-20220812},
typ = {PUB:(DE-HGF)24},
doi = {10.5281/ZENODO.16736244},
url = {https://juser.fz-juelich.de/record/1046208},
}