% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MASTERSTHESIS{Gramlich:906244,
author = {Gramlich, Vincent},
title = {{D}eep learning methods for forecasting of extreme ambient
ozone values},
school = {University of Cologne},
type = {Masterarbeit},
reportid = {FZJ-2022-01320},
pages = {59 p.},
year = {2021},
note = {Masterarbeit, University of Cologne, 2021},
abstract = {Exposure to high ozone concentrations can be harmful for
humans and therefore many countries have declared a
threshold for ozone concentrations that should not be
exceeded. That is why the ability to predict high and
extreme near surface ozone concentrations is important not
only for human health but also for regulatory purposes. The
problem with many existing ozone forecasting methods,
especially deep learning approaches, is their inaccuracy and
unreliability to forecast high ozone concentrations. The
goal of this study is to discover the usage of oversampling
and subsequent finetuning to increase the forecast precision
for extreme near surface ozone concentration. Therefore the
architecture and experiment setup of IntelliO3-ts, a
convolutional neural network for the forecast of near
surface ozone concentrations, is used as a foundation to
which the methods are applied. At first, oversampling is
applied to the data set, which is the process of multiplying
samples from less frequent ozone concentration ranges and
adding them to the data set. The thereby obtained new
"oversampled" data set that has a flatter sample
distribution is then used to train the neural network. In a
second and additional step the finetuning takes place, which
is a retraining of the network obtained in the first step,
using the original data set before oversampling was applied.
For both methods different parameters will be tested and
evaluated on the basis of different scores calculated on 2x2
contingency tables. The contingency tables are created by
using a threshold and separating the test data in two
groups, ozone concentrations below and above the threshold.
The oversampling increases the ability to successfully
forecast if a sample exceeds a certain threshold, thereby
increasing the forecast precision for high ozone values.
These advantages come at the cost of also increasing the
percentage of samples that are falsely predicted to be above
a certain threshold, also resulting in a systematic
overestimation. The best model obtained, was able to
increase the hit rate at 60 ppb from $43\%$ to $67\%$ and at
80 ppb from $1:9\%$ to $15.2\%.$ This means that the modelis
able to correctly predict that a sample is above 60 ppb and
80 ppb for $67\%$ and $15.2\%$ of all samples above that
threshold instead of only achieving this for $43\%$
$and1.9\%$ of the samples above that threshold,
respectively. Therefore the oversampling offers a valuable
trade off, to sacrifice parts of the overall performance in
order to increase the ability to forecast high ozone values,
which might be useful especially for regulatory purposes.
The finetuning did not add any new value to that, but only
reverted some of the improvements that were achieved by the
oversampling. For future research it might be interesting to
investigate the usage of different oversampling methods or
explore the application of oversampling and finetuning to
the forecasting of multiple days, as this study only focused
on the forecast for the next day.},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511)},
pid = {G:(DE-HGF)POF4-5111},
typ = {PUB:(DE-HGF)19},
url = {https://juser.fz-juelich.de/record/906244},
}