% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Khler:892689,
author = {Köhler, Cristiano and Ulianych, Danylo and Gerkin, Richard
C. and Davison, Andrew P. and Grün, Sonja and Denker,
Michael},
title = {{C}apturing detailed provenance information in the analysis
of electrophysiology data},
reportid = {FZJ-2021-02267},
year = {2021},
abstract = {The analysis of electrophysiology data typically comprises
multiple steps. These often consist of several scripts
executed in a specific sequence that take different
parameter sets and use distinct data files. As the
researcher adjusts the individual analysis steps to
accommodate new hypotheses or additional data, the resulting
workflows may become increasingly complex, and undergo
frequent changes. Therefore, robust tools forming the
workflows are necessary to fully document the workflow and
improve the reproducibility of the results. Provenance
refers to the characterization of data manipulations and
corresponding parameters throughout the analysis [1]. It is
possible to use workflow management systems to orchestrate
the execution of the scripts and capture provenance
information at the level of the script (i.e., which script
file was executed, and in which environment?) and data file
(i.e., which input and output files were supplied to that
script). However, the resulting provenance track does not
automatically provide details about the actual analysis
carried out inside each script. Thus, analysis results can
only be understood by source code inspection or trust in the
correctness of any accompanying documentation. Here, we aim
to improve existing tools by implementing a data model that
captures detailed provenance information and by accurately
representing the analysis results in a systematic and
formalized manner. We focus on two open-source tools for the
analysis of electrophysiology data. The Neo
$(RRID:SCR_000634)$ framework provides an object model to
standardize neural activity data acquired from distinct
sources [2]. Elephant $(RRID:SCR_003833)$ is a Python
toolbox that provides several functions for the analysis of
electrophysiology data.3 We implemented prototypes of two
complementary solutions to extend the functionality of Neo
and Elephant to (i) automatically capture provenance
information at the function-execution level inside a Python
script, and to (ii) support the standardization of the
analysis results together with the storage of relevant
information describing their generation. The first solution
is a set of data analysis objects that standardize the
output of Elephant functions. They encapsulate all relevant
parameters used by the function to generate the output, such
that they can be easily re-used or shared. The second
solution maps function inputs, outputs, and parameters
throughout the execution of the Python analysis script, and
builds a representation of the relationships between the
different steps of the analysis within the script (i.e., the
provenance trace). The captured information can be used to
build a graph to visualize the steps followed in the script,
and that can be stored together with the results as
metadata. We compare the results obtained with or without
the use of the two solutions on the basis of a realistic
analysis scenario of electrophysiology data, showing the
potential benefits for reproducibility, interoperability,
discoverability, and re-use of analysis results. References:
[1] Ragan et al. (2016) IEEE Trans Visual Comput Graphics
22:31. [2] Garcia et al. (2014) Front Neuroinform 8:10. [3]
http://python-elephant.org.},
month = {Mar},
date = {2021-03-22},
organization = {14th Göttingen Meeting of the German
Neuroscience Society 2021, online
(Germany), 22 Mar 2021 - 30 Mar 2021},
subtyp = {Other},
cin = {INM-6 / INM-10 / IAS-6},
cid = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)INM-10-20170113 /
I:(DE-Juel1)IAS-6-20130828},
pnm = {5235 - Digitization of Neuroscience and User-Community
Building (POF4-523) / 5231 - Neuroscientific Foundations
(POF4-523) / 571 - Connectivity and Activity (POF3-571) /
574 - Theory, modelling and simulation (POF3-574) / HDS LEE
- Helmholtz School for Data Science in Life, Earth and
Energy (HDS LEE) (HDS-LEE-20190612) / HBP SGA2 - Human Brain
Project Specific Grant Agreement 2 (785907) / HBP SGA3 -
Human Brain Project Specific Grant Agreement 3 (945539) /
HAF - Helmholtz Analytics Framework (ZT-I-0003)},
pid = {G:(DE-HGF)POF4-5235 / G:(DE-HGF)POF4-5231 /
G:(DE-HGF)POF3-571 / G:(DE-HGF)POF3-574 /
G:(DE-Juel1)HDS-LEE-20190612 / G:(EU-Grant)785907 /
G:(EU-Grant)945539 / G:(DE-HGF)ZT-I-0003},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/892689},
}