% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Khler:885612,
author = {Köhler, Cristiano and Ulianych, Danylo and Gerkin, Richard
C. and Davison, Andrew P. and Grün, Sonja and Denker,
Michael},
title = {{P}rovenance capture in the analysis of electrophysiology
data: an example based on the {E}lephant package},
reportid = {FZJ-2020-03964},
year = {2020},
abstract = {Workflows for the analysis of electrophysiology data
typically comprise multiple steps, which should be fully
documented when aiming at the reproducibility of the
results. Considering the complexity, modularity and often
iterative nature of such workflows, robust tools forming the
basis of the workflow are necessary [1]. We focus here on
two open-source tools used for the analysis of
electrophysiology data. The Neo $(RRID:SCR_000634)$
framework provides a data object model to standardize data
of different origins [2]. Elephant $(RRID:SCR_003833)$ is a
toolbox for both standard and highly sophisticated analyses
of simulated and experimental data [3]. The characterization
of all data manipulations and the parameters throughout the
workflow provides provenance information [4] that improves
reproducibility of the results. This requires complete and
self-explanatory descriptions of the data objects in the
workflow and a method to minimize the need for manually
tracking its execution. While the Neo framework provides a
model to structure the neuronal data and associated
metadata, a similar representation for the outputs of the
analysis part of the workflow is still missing. Moreover,
automated provenance capture is not available at the
function level for a single Python script. Thus, existing
tools must be improved to implement a data model that
captures analysis outputs and workflow provenance and,
ultimately, represents the analysis and its results in
accordance with the FAIR principles [5].Here we present a
conceptual solution to capture provenance during the
analysis of electrophysiology data. First, we introduce a
standardization of the outputs of the Elephant functions,
which is inspired by the Neo model. Thus, the information
about the generation of an analysis output will be
encapsulated in a new set of Python objects that can be
easily re-used or shared. These objects will be integrated
into the existing code bases with minimal disruption. This
will free the scientist from the need to manually annotate
the output of the analysis. Second, we will show how to
capture provenance information throughout the Python
analysis script by using function decorators. These track
the Elephant and user-defined functions in the script while
mapping the inputs to the outputs, thereby also yielding a
provenance trace in the form of a graph. We present a
prototype implementation and demonstrate its use in a
scenario where spike and LFP data are analyzed by standard
methods. References: [1] Denker, M. and Grün, S. (2016).
Designing Workflows for the Reproducible Analysis of
Electrophysiological Data. In Brain-Inspired Computing,
Amunts, K. et al., eds. (Cham: Springer International
Publishing), pp. 58-72. [2] Garcia, S. et al. (2014) Neo: an
object model for handling electrophysiology data in multiple
formats. Frontiers in Neuroinformatics 8:10. [3]
http://python-elephant.org [4] Ragan, E.D. et al. (2016).
Characterizing Provenance in Visualization and Data
Analysis: An Organizational Framework of Provenance Types
and Purposes. IEEE Transactions on Visualization and
Computer Graphics. 22(1):31–40. [5] Wilkinson, M.D. et al.
(2016). The FAIR Guiding Principles for scientific data
management and stewardship. Scientific Data 3, 160018.},
month = {Sep},
date = {2020-09-29},
organization = {Online Bernstein Conference 2020,
online (Germany), 29 Sep 2020 - 1 Oct
2020},
subtyp = {Other},
cin = {INM-6 / INM-10 / IAS-6},
cid = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)INM-10-20170113 /
I:(DE-Juel1)IAS-6-20130828},
pnm = {571 - Connectivity and Activity (POF3-571) / 574 - Theory,
modelling and simulation (POF3-574) / HDS LEE - Helmholtz
School for Data Science in Life, Earth and Energy (HDS LEE)
(HDS-LEE-20190612) / HBP SGA2 - Human Brain Project Specific
Grant Agreement 2 (785907) / HBP SGA3 - Human Brain Project
Specific Grant Agreement 3 (945539) / HAF - Helmholtz
Analytics Framework (ZT-I-0003) / PhD no Grant - Doktorand
ohne besondere Förderung (PHD-NO-GRANT-20170405)},
pid = {G:(DE-HGF)POF3-571 / G:(DE-HGF)POF3-574 /
G:(DE-Juel1)HDS-LEE-20190612 / G:(EU-Grant)785907 /
G:(EU-Grant)945539 / G:(DE-HGF)ZT-I-0003 /
G:(DE-Juel1)PHD-NO-GRANT-20170405},
typ = {PUB:(DE-HGF)24},
doi = {10.12751/NNCN.BC2020.0098},
url = {https://juser.fz-juelich.de/record/885612},
}