% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Sprenger:877844,
author = {Sprenger, Julia},
title = {{T}ools and {W}orkflows for {D}ata $\&$ {M}etadata
{M}anagement of {C}omplex {E}xperiments - {B}uilding a
{F}oundation for {R}eproducible $\&$ {C}ollaborative
{A}nalysis in the {N}eurosciences},
volume = {222},
school = {RWTH Aachen},
type = {Dissertation},
address = {Jülich},
publisher = {Forschungszentrum Jülich GmbH Zentralbibliothek, Verlag},
reportid = {FZJ-2020-02468},
isbn = {978-3-95806-478-2},
series = {Schriften des Forschungszentrums Jülich. Reihe
Schlüsseltechnologien / Key Technologies},
pages = {X, 168 S.},
year = {2020},
note = {Dissertation, RWTH Aachen, 2020},
abstract = {The scientific knowledge of mankind is based on the
verification of hypotheses by carrying out experiments. As
the construction and conduct of an experiment becomes
increasingly complex more and more scientists are involved
in a single project. In order to make the generated data
easily accessible to all scientists and, at best, to the
entire scientific community, it is essential to
comprehensively document the circumstances of the data
generation, as these contain essential information for later
analysis and interpretation. In this thesis, I present two
complex neuroscience projects and the strategies, tools, and
concepts that were used to comprehensively track, process,
organize, and prepare the collected data for joint analysis.
First, I describe the older of the two experiments and
explain in detail the generation of data and metadata and
the pipeline used for aggregating metadata. A hierarchical
approach based on the open source software $\textit{odML}$
for metadata organization was implemented to capture the
complex meta information of this project. I evaluate the
design concepts and tools used and derive a general
catalogue of requirements for scientific collaboration in
complex projects. Also, I identify issues and requirements
that were not yet addressed by this pipeline. There were, in
particular, the difficulties in i) entering manual metadata
and structuring the metadata collection,ii) combining
metadata with the actual data, and iii) setting up the
pipeline in a modular generic and transparent manner. Guided
by this analysis, I describe concept and tool
implementations to address these identified issues. I
developed a complementary tool ($\textit{odMLtables}$) to i)
facilitate the capture of metadata in a structured way and
to ii) convert these easily into the hierarchical,
standardized metadata format $\textit{odML. odMLtables}$
provides an interface between the easy-to-read tabular
metadata representation in the formats commonly used in
laboratory environments (csv/xls) and the hierarchically
organized $\textit{odML}$ format based on xml, which is
designed for a comprehensive collection of complex metadata
records in an easily machine-readable manner. Supplementing
the coordinated capture of metadata, I contributed to and
shaped the $\textit{Neo}$ toolbox for the standardized
representation of electrophysiological data. This toolbox is
a key component for electrophysiological data analysis as it
integrates different proprietary and non-proprietary file
formats and serves as a bridge between different file
formats. I emphasize new features that simplify the process
of data and metadata handling in the data acquisition
workflow. I introduce the concept of workflow management
into the field of scientific data processing, based on the
common Python-based snakemake package. For the second, more
recent electrophysiological experiment, I designed and
implemented the workflow for capturing and packaging
metadata and data in a comprehensive form. Here I used the
generic neuroscience information exchange format
($\textit{Nix}$) for the user-friendly packaging of data
sets including data and metadata in combined form. Finally,
I evaluate the improved workflow against the requirements of
collaborative scientific work in complex projects. I
establish general guidelines for conducting such experiments
and workflows in a scientific environment. In conclusion, I
present the next development steps for the presented
workflow and potential avenues for deploying this prototype
as a production prototype to a wider scientific community.},
cin = {INM-6 / IAS-6 / INM-10},
cid = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)IAS-6-20130828 /
I:(DE-Juel1)INM-10-20170113},
pnm = {899 - ohne Topic (POF3-899)},
pid = {G:(DE-HGF)POF3-899},
typ = {PUB:(DE-HGF)3 / PUB:(DE-HGF)11},
urn = {urn:nbn:de:0001-2020072301},
url = {https://juser.fz-juelich.de/record/877844},
}