% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Beier:1038382,
author = {Beier, Sebastian and Mühlhaus, Timo and Pommier, Cyril and
Owen, Stuart and Brilhaus, Dominik and Weil, Heinrich Lukas
and Wetzels, Florian and Chait, Gavin and Arend, Daniel and
Feser, Manuel and Doniparthi, Gajendra and Bauer, Jonathan
and Gundersen, Sveinung and Vázquez, Pável},
title = {{B}io{H}ack{EU}23 report: {E}nabling continuous {RDM} using
{A}nnotated {R}esearch {C}ontexts with {RO}-{C}rate profiles
for {ISA}},
journal = {BioHackrXiv},
reportid = {FZJ-2025-01379},
year = {2024},
abstract = {A prevailing paradigm in Research Data Management (RDM) is
to publish research datasets in designated archives upon
conclusion of a research process. However, it is beneficial
to abandon the notion of final or static data artifacts and
instead adopt a continuous approach towards working with
research data, where data is constantly shared, versioned,
and updated. This immutable yet evolving perspective allows
for the application of existing technologies and processes
from software engineering, such as continuous integration,
release practices, and version management backed by decades
of experience, and adaptable to RDM.To facilitate this, we
propose the Annotated Research Context (ARC), a data and
metadata layout convention based on the well-established ISA
model for metadata annotation and implemented using Git
repositories. ARCs are amenable towards frequent,
lightweight data management operations, such as (meta)data
validation and transformation. The Omnipy Python library is
designed to help develop stepwise validated (meta)data
transformations as scalable data flows that can be
incrementally designed, updated, and rerun as requirements
or data evolve.To demonstrate the concept of continuous RDM
we will use Omnipy to define and orchestrate Git-backed
CI/CD (Continuous Integration/Continuous Delivery) data
flows to convert ISA metadata present in ARCs into validated
RO-Crate representations adhering to the Bioschemas
convention. A RO-Crate package combines the actual research
data with its metadata description. Downstream, this allows
semantic interpretation by Galaxy for e.g. workflow
execution as well as machine-readable data access and data
harvesting for search engines such as FAIDARE.},
cin = {IBG-4},
cid = {I:(DE-Juel1)IBG-4-20200403},
pnm = {2171 - Biological and environmental resources for
sustainable use (POF4-217) / DFG project G:(GEPRIS)442077441
- DataPLANT – Daten in Pflanzen-Grundlagenforschung
(442077441)},
pid = {G:(DE-HGF)POF4-2171 / G:(GEPRIS)442077441},
typ = {PUB:(DE-HGF)25},
doi = {10.37044/osf.io/7y2jh},
url = {https://juser.fz-juelich.de/record/1038382},
}