% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Beier:908264,
author = {Beier, Sebastian and Fiebig, Anne and Pommier, Cyril and
Liyanage, Isuru and Lange, Matthias and Kersey, Paul J. and
Weise, Stephan and Finkers, Richard and Koylass, Baron and
Cezard, Timothee and Courtot, Mélanie and
Contreras-Moreira, Bruno and Naamati, Guy and Dyer, Sarah
and Scholz, Uwe},
title = {{R}ecommendations for the formatting of {V}ariant {C}all
{F}ormat ({VCF}) files to make plant genotyping data {FAIR}},
journal = {F1000Research},
volume = {11},
issn = {2046-1402},
address = {London},
publisher = {F1000 Research Ltd},
reportid = {FZJ-2022-02498},
pages = {231 -},
year = {2022},
abstract = {In this opinion article, we discuss the formatting of files
from (plant) genotyping studies, in particular the
formatting of metadata in Variant Call Format (VCF) files.
The flexibility of the VCF format specification facilitates
its use as a generic interchange format across domains but
can lead to inconsistency between files in the presentation
of metadata. To enable fully autonomous machine actionable
data flow, generic elements need to be further specified.We
strongly support the merits of the FAIR principles and see
the need to facilitate them also through technical
implementation specifications. They form a basis for the
proposed VCF extensions here. We have learned from the
existing application of VCF that the definition of relevant
metadata using controlled standards, vocabulary and the
consistent use of cross-references via resolvable
identifiers (machine-readable) are particularly necessary
and propose their encoding.VCF is an established standard
for the exchange and publication of genotyping data. Other
data formats are also used to capture variant data (for
example, the HapMap and the gVCF formats), but none
currently have the reach of VCF. For the sake of simplicity,
we will only discuss VCF and our recommendations for its
use, but these recommendations could also be applied to
gVCF. However, the part of the VCF standard relating to
metadata (as opposed to the actual variant calls) defines a
syntactic format but no vocabulary, unique identifier or
recommended content. In practice, often only sparse
descriptive metadata is included. When descriptive metadata
is provided, proprietary metadata fields are frequently
added that have not been agreed upon within the community
which may limit long-term and comprehensive
interoperability. To address this, we propose
recommendations for supplying and encoding metadata,
focusing on use cases from plant sciences. We expect there
to be overlap, but also divergence, with the needs of other
domains.},
cin = {IBG-4},
ddc = {610},
cid = {I:(DE-Juel1)IBG-4-20200403},
pnm = {2171 - Biological and environmental resources for
sustainable use (POF4-217) / AGENT - Activated GEnebank
NeTwork (862613) / de.NBI - Etablierungsphase -
Leistungszentrum - GCBN - German Crop BioGreenformatics
Network (031A536C)},
pid = {G:(DE-HGF)POF4-2171 / G:(EU-Grant)862613 /
G:(BMBF)031A536C},
typ = {PUB:(DE-HGF)16},
doi = {10.12688/f1000research.109080.2},
url = {https://juser.fz-juelich.de/record/908264},
}