% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Loup:1050382,
      author       = {Loup, Ulrich and Brinckmann, Nils},
      othercontributors = {Faber, Claas and Ingenbeek, Martin and Koppe, Roland and
                          Lorenz, Christof and Schäfer, David and Sorg, Jürgen and
                          Rambhia, Mihir},
      title        = {{T}he {H}elmholtz {E}arth and {E}nvironment {D}ata{H}ub -
                      {H}ighly {D}istributed {D}ata {T}hat {T}hrives on
                      {M}etadata},
      reportid     = {FZJ-2026-00155},
      year         = {2025},
      abstract     = {In Environmental Sciences, Time-series data is key to, for
                      example, monitoring environmental processes, validating
                      earth system models and remote sensing products, training of
                      data driven methods and better understanding of climate
                      processes. A major issue is the lack of a consistent data
                      availability standard aligned with the FAIR (findable
                      accessible interoperable reusable) principles. The DataHub
                      initiative, which is part of the Helmholtz Research Field
                      Earth and Environment, addresses these shortcomings by
                      establishing a large-scale infrastructure around common data
                      standards and interfaces, for example, the Open Geospatial
                      Consortium’s SensorThings API (STA). Closely related to
                      the DataHub is the STAMPLATE project, whose challenging task
                      was to harmonize the extremely heterogeneous metadata
                      formats stemming from the different observation domains such
                      as the earth, atmosphere and ocean. Moreover, within the
                      domains different metadata formats developed historically
                      due to diverging system architectures and missing
                      guidelines. In DataHub, the research data, whether it is
                      collected by measurement devices or acquired through manual
                      processes, is distributed among the seven participating
                      research centers. Each of these centers is responsible for
                      operating its own time series management system, which
                      ingests the observational data. In addition to these data
                      ingest systems, sensor and device management systems provide
                      easy-to-use self-services for entering metadata, such as the
                      Helmholtz Sensor Management System
                      (https://helmholtz.software/software/sensor-management-system)
                      or the O2A Registry (https://registry.o2a-data.de/). Each
                      center operates a data/metadata synchronization service that
                      ultimately makes the data available through STA, which
                      integrates both data and metadata. Quality checking tools
                      such as SaQC (https://helmholtz.software/software/saqc)
                      facilitate data quality control. The powerful and modern
                      Earth Data Portal (www.earth-data.de) with highly
                      customizable thematic viewers is the central portal for data
                      exploration. In order to ensure that metadata entered in any
                      user self-service is also displayed in the Earth Data Portal
                      along with the ingested data, custom, semantic metadata
                      profiles developed in STAMPLATE augment STA’s core data
                      model with domain-specific information. In summary, the data
                      that is accessible on the Earth Data Portal and available
                      from the STA endpoints is distributed in two distinct
                      categories. Firstly, observation data and its metadata are
                      acquired by separate systems. And secondly, each center
                      operates its own data and metadata infrastructure, with all
                      centers ultimately connecting to STA endpoints. The
                      operationalization of the framework and its subsequent
                      integration into research data workflows is imminent,
                      presenting us with a number of challenges as our research
                      data management processes undergo a transformative shift
                      from manual, human-based workflows to self-organized,
                      digitally-enabled workflows. For example, new ways of
                      downloading data need to be found that meet the needs of
                      researchers, while addressing issues such as copyright and
                      avoiding infrastructure overload. This talk addresses the
                      fundamental elements of our initiative and the associated
                      challenges.},
      month         = {Oct},
      date          = {2025-10-23},
      organization  = {Distribits 2025, Düsseldorf
                       (Germany), 23 Oct 2025 - 24 Oct 2025},
      subtyp        = {Plenary/Keynote},
      cin          = {IBG-3},
      cid          = {I:(DE-Juel1)IBG-3-20101118},
      pnm          = {2173 - Agro-biogeosystems: controls, feedbacks and impact
                      (POF4-217)},
      pid          = {G:(DE-HGF)POF4-2173},
      typ          = {PUB:(DE-HGF)6},
      doi          = {10.5281/ZENODO.17419899},
      url          = {https://juser.fz-juelich.de/record/1050382},
}