% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Loup:1050382,
author = {Loup, Ulrich and Brinckmann, Nils},
othercontributors = {Faber, Claas and Ingenbeek, Martin and Koppe, Roland and
Lorenz, Christof and Schäfer, David and Sorg, Jürgen and
Rambhia, Mihir},
title = {{T}he {H}elmholtz {E}arth and {E}nvironment {D}ata{H}ub -
{H}ighly {D}istributed {D}ata {T}hat {T}hrives on
{M}etadata},
reportid = {FZJ-2026-00155},
year = {2025},
abstract = {In Environmental Sciences, Time-series data is key to, for
example, monitoring environmental processes, validating
earth system models and remote sensing products, training of
data driven methods and better understanding of climate
processes. A major issue is the lack of a consistent data
availability standard aligned with the FAIR (findable
accessible interoperable reusable) principles. The DataHub
initiative, which is part of the Helmholtz Research Field
Earth and Environment, addresses these shortcomings by
establishing a large-scale infrastructure around common data
standards and interfaces, for example, the Open Geospatial
Consortium’s SensorThings API (STA). Closely related to
the DataHub is the STAMPLATE project, whose challenging task
was to harmonize the extremely heterogeneous metadata
formats stemming from the different observation domains such
as the earth, atmosphere and ocean. Moreover, within the
domains different metadata formats developed historically
due to diverging system architectures and missing
guidelines. In DataHub, the research data, whether it is
collected by measurement devices or acquired through manual
processes, is distributed among the seven participating
research centers. Each of these centers is responsible for
operating its own time series management system, which
ingests the observational data. In addition to these data
ingest systems, sensor and device management systems provide
easy-to-use self-services for entering metadata, such as the
Helmholtz Sensor Management System
(https://helmholtz.software/software/sensor-management-system)
or the O2A Registry (https://registry.o2a-data.de/). Each
center operates a data/metadata synchronization service that
ultimately makes the data available through STA, which
integrates both data and metadata. Quality checking tools
such as SaQC (https://helmholtz.software/software/saqc)
facilitate data quality control. The powerful and modern
Earth Data Portal (www.earth-data.de) with highly
customizable thematic viewers is the central portal for data
exploration. In order to ensure that metadata entered in any
user self-service is also displayed in the Earth Data Portal
along with the ingested data, custom, semantic metadata
profiles developed in STAMPLATE augment STA’s core data
model with domain-specific information. In summary, the data
that is accessible on the Earth Data Portal and available
from the STA endpoints is distributed in two distinct
categories. Firstly, observation data and its metadata are
acquired by separate systems. And secondly, each center
operates its own data and metadata infrastructure, with all
centers ultimately connecting to STA endpoints. The
operationalization of the framework and its subsequent
integration into research data workflows is imminent,
presenting us with a number of challenges as our research
data management processes undergo a transformative shift
from manual, human-based workflows to self-organized,
digitally-enabled workflows. For example, new ways of
downloading data need to be found that meet the needs of
researchers, while addressing issues such as copyright and
avoiding infrastructure overload. This talk addresses the
fundamental elements of our initiative and the associated
challenges.},
month = {Oct},
date = {2025-10-23},
organization = {Distribits 2025, Düsseldorf
(Germany), 23 Oct 2025 - 24 Oct 2025},
subtyp = {Plenary/Keynote},
cin = {IBG-3},
cid = {I:(DE-Juel1)IBG-3-20101118},
pnm = {2173 - Agro-biogeosystems: controls, feedbacks and impact
(POF4-217)},
pid = {G:(DE-HGF)POF4-2173},
typ = {PUB:(DE-HGF)6},
doi = {10.5281/ZENODO.17419899},
url = {https://juser.fz-juelich.de/record/1050382},
}