% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Beier:908264,
      author       = {Beier, Sebastian and Fiebig, Anne and Pommier, Cyril and
                      Liyanage, Isuru and Lange, Matthias and Kersey, Paul J. and
                      Weise, Stephan and Finkers, Richard and Koylass, Baron and
                      Cezard, Timothee and Courtot, Mélanie and
                      Contreras-Moreira, Bruno and Naamati, Guy and Dyer, Sarah
                      and Scholz, Uwe},
      title        = {{R}ecommendations for the formatting of {V}ariant {C}all
                      {F}ormat ({VCF}) files to make plant genotyping data {FAIR}},
      journal      = {F1000Research},
      volume       = {11},
      issn         = {2046-1402},
      address      = {London},
      publisher    = {F1000 Research Ltd},
      reportid     = {FZJ-2022-02498},
      pages        = {231 -},
      year         = {2022},
      abstract     = {In this opinion article, we discuss the formatting of files
                      from (plant) genotyping studies, in particular the
                      formatting of metadata in Variant Call Format (VCF) files.
                      The flexibility of the VCF format specification facilitates
                      its use as a generic interchange format across domains but
                      can lead to inconsistency between files in the presentation
                      of metadata. To enable fully autonomous machine actionable
                      data flow, generic elements need to be further specified.We
                      strongly support the merits of the FAIR principles and see
                      the need to facilitate them also through technical
                      implementation specifications. They form a basis for the
                      proposed VCF extensions here. We have learned from the
                      existing application of VCF that the definition of relevant
                      metadata using controlled standards, vocabulary and the
                      consistent use of cross-references via resolvable
                      identifiers (machine-readable) are particularly necessary
                      and propose their encoding.VCF is an established standard
                      for the exchange and publication of genotyping data. Other
                      data formats are also used to capture variant data (for
                      example, the HapMap and the gVCF formats), but none
                      currently have the reach of VCF. For the sake of simplicity,
                      we will only discuss VCF and our recommendations for its
                      use, but these recommendations could also be applied to
                      gVCF. However, the part of the VCF standard relating to
                      metadata (as opposed to the actual variant calls) defines a
                      syntactic format but no vocabulary, unique identifier or
                      recommended content. In practice, often only sparse
                      descriptive metadata is included. When descriptive metadata
                      is provided, proprietary metadata fields are frequently
                      added that have not been agreed upon within the community
                      which may limit long-term and comprehensive
                      interoperability. To address this, we propose
                      recommendations for supplying and encoding metadata,
                      focusing on use cases from plant sciences. We expect there
                      to be overlap, but also divergence, with the needs of other
                      domains.},
      cin          = {IBG-4},
      ddc          = {610},
      cid          = {I:(DE-Juel1)IBG-4-20200403},
      pnm          = {2171 - Biological and environmental resources for
                      sustainable use (POF4-217) / AGENT - Activated GEnebank
                      NeTwork (862613) / de.NBI - Etablierungsphase -
                      Leistungszentrum - GCBN - German Crop BioGreenformatics
                      Network (031A536C)},
      pid          = {G:(DE-HGF)POF4-2171 / G:(EU-Grant)862613 /
                      G:(BMBF)031A536C},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.12688/f1000research.109080.2},
      url          = {https://juser.fz-juelich.de/record/908264},
}