% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Beier:1038382,
      author       = {Beier, Sebastian and Mühlhaus, Timo and Pommier, Cyril and
                      Owen, Stuart and Brilhaus, Dominik and Weil, Heinrich Lukas
                      and Wetzels, Florian and Chait, Gavin and Arend, Daniel and
                      Feser, Manuel and Doniparthi, Gajendra and Bauer, Jonathan
                      and Gundersen, Sveinung and Vázquez, Pável},
      title        = {{B}io{H}ack{EU}23 report: {E}nabling continuous {RDM} using
                      {A}nnotated {R}esearch {C}ontexts with {RO}-{C}rate profiles
                      for {ISA}},
      journal      = {BioHackrXiv},
      reportid     = {FZJ-2025-01379},
      year         = {2024},
      abstract     = {A prevailing paradigm in Research Data Management (RDM) is
                      to publish research datasets in designated archives upon
                      conclusion of a research process. However, it is beneficial
                      to abandon the notion of final or static data artifacts and
                      instead adopt a continuous approach towards working with
                      research data, where data is constantly shared, versioned,
                      and updated. This immutable yet evolving perspective allows
                      for the application of existing technologies and processes
                      from software engineering, such as continuous integration,
                      release practices, and version management backed by decades
                      of experience, and adaptable to RDM.To facilitate this, we
                      propose the Annotated Research Context (ARC), a data and
                      metadata layout convention based on the well-established ISA
                      model for metadata annotation and implemented using Git
                      repositories. ARCs are amenable towards frequent,
                      lightweight data management operations, such as (meta)data
                      validation and transformation. The Omnipy Python library is
                      designed to help develop stepwise validated (meta)data
                      transformations as scalable data flows that can be
                      incrementally designed, updated, and rerun as requirements
                      or data evolve.To demonstrate the concept of continuous RDM
                      we will use Omnipy to define and orchestrate Git-backed
                      CI/CD (Continuous Integration/Continuous Delivery) data
                      flows to convert ISA metadata present in ARCs into validated
                      RO-Crate representations adhering to the Bioschemas
                      convention. A RO-Crate package combines the actual research
                      data with its metadata description. Downstream, this allows
                      semantic interpretation by Galaxy for e.g. workflow
                      execution as well as machine-readable data access and data
                      harvesting for search engines such as FAIDARE.},
      cin          = {IBG-4},
      cid          = {I:(DE-Juel1)IBG-4-20200403},
      pnm          = {2171 - Biological and environmental resources for
                      sustainable use (POF4-217) / DFG project G:(GEPRIS)442077441
                      - DataPLANT – Daten in Pflanzen-Grundlagenforschung
                      (442077441)},
      pid          = {G:(DE-HGF)POF4-2171 / G:(GEPRIS)442077441},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.37044/osf.io/7y2jh},
      url          = {https://juser.fz-juelich.de/record/1038382},
}