% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Betancourt:1019494,
      author       = {Betancourt, Clara},
      title        = {{M}apping and {I}nterpolation of {T}ropospheric {O}zone
                      {D}ata with {M}achine {L}earning {M}ethods},
      school       = {Universität Bonn},
      type         = {Dissertation},
      address      = {Bonn},
      publisher    = {Universitäts- und Landesbibliothek Bonn},
      reportid     = {FZJ-2023-05441},
      pages        = {167 p.},
      year         = {2023},
      note         = {Dissertation, Universität Bonn, 2023},
      abstract     = {Tropospheric ozone is a toxic trace gas in the atmosphere.
                      It threatens human health, damages crops and vegetation, and
                      it is a short-lived climate forcer. Ozone is a secondary air
                      pollutant that undergoes multiple physical and chemical
                      processes on a wide range of timescales. Therefore, as with
                      many environmental variables, it is difficult to quantify
                      ozone concentrations where measurements are not available.
                      To solve this problem, the goal of this work is to develop
                      spatio-temporal mapping and interpolation methods using
                      machine learning techniques with the example application of
                      ozone data. We train the machine learning models on a large
                      number of ozone measurements available in the Tropospheric
                      Ozone Assessment Report (TOAR) database. The most important
                      contributions of this work are: • Mapping and
                      interpolating ozone data, providing high-resolution,
                      high-accuracy, spatiotemporal data products. The data
                      products cover spatial domains from the regional to the
                      global level, and their temporal resolution ranges from
                      hourly data to multi-year statistics. We use large
                      quantities of ozone measurements, combined with model data
                      and geospatial data to generate the data products. •
                      Adapting, developing, and explaining new state-of-the-art
                      machine learning methods that we use to create these data
                      products. The most relevant algorithms of this work are
                      tree-based and graph-based methods. For example, we develop
                      a multi-scale evaluation technique for spatial machine
                      learning models and verify their physical consistency by
                      using Shapley additive explanations. • Utilizing
                      spatiotemporal patterns in geospatial data and ozone
                      measurements in machine learning models. We use aggregated
                      local to regional geospatial site conditions as input
                      features for machine learning models. Furthermore, we adopt
                      a graph machine learning algorithm to work on ozone
                      measurements at irregularly placed air quality monitoring
                      stations.With this work, we publish AQ-Bench, a benchmark
                      dataset for machine learning on global long-term ozone
                      metrics. We link explainable machine learning on AQ-Bench
                      with uncertainty assessments to point out limits in the
                      dataset and the applicability of the resulting machine
                      learning models. With the trained models, we also create the
                      first completely data-driven, global, high-resolution map of
                      long-term ozone metrics (resolution 0.1°×0.1°, years 2010
                      - 2014). Finally, we develop a high-performance graph-based
                      missing data interpolation method for ozone measurements. It
                      has an index of agreement of 0.96 - 0.99 for hourly missing
                      data interpolation in Germany. The synthesis of this work is
                      that an interplay of physically sound data selection,
                      uncertainty quantification, and explainability in machine
                      learning can produce trustworthy environmental data
                      products. We also found that the accuracy of the data
                      products in a specific region is mainly dependent on good
                      coverage with ozone measurements in that region. Therefore,
                      this work contributes not only to the gapless quantification
                      of ozone concentrations but also to trustworthy machine
                      learning in the environmental sciences.},
      keywords     = {air quality (Other) / tropospheric ozone (Other) / machine
                      learning (Other) / ddc:550 (Other)},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / IntelliAQ -
                      Artificial Intelligence for Air Quality (787576)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)787576},
      typ          = {PUB:(DE-HGF)11},
      doi          = {10.48565/BONNDOC-179},
      url          = {https://juser.fz-juelich.de/record/1019494},
}