% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Kaffashzadeh:875342,
author = {Kaffashzadeh, Najmeh and Chang, Kai-Lan and Schröder,
Sabine and Schultz, Martin G.},
title = {{A} {S}tatistical {M}odel for {A}utomated {Q}uality
{A}ssessment of the {TOAR}-{II}},
reportid = {FZJ-2020-01965},
year = {2020},
abstract = {The Tropospheric Ozone Assessment Report, phase 2,
(TOAR-II) database is a collection of global ground-level
ozone in-situ measurements from various locations. It also
holds data of selected ozone precursors and meteorological
variables. TOAR-II assembles air quality data from many
different sources and thus requires a common data quality
assessment (QA) to ensure the data meet the quality required
for globally consistent analyses. The large volume of this
database (more than 100,000 data series) enforces the use of
automated, data-driven QA procedures. Accordingly, we have
developed a statistical model for automated QA. This model
consists of several statistical tests that are classified
into several sub-groups. In this model, a QA-score (an
indicator ranging from 0 to 1) was assigned to each
individual data point to estimates the $value\‘s$
plausibility. The foundation of this concept is statistical
hypothesis testing and the probability theory. This model
was implemented in a Python package and is called
AutoQA4Env. One application of AutoQA4Env is the data
ingestion workflow of TOAR-II. The tool generates a data
quality report which is then sent back to the data provider
for inspection. Since AutoQA4Env is easily configurable, it
allows the users to set quality thresholds and thus filter
data according to their use case. While we primarily develop
AutoQA4Env for air quality data, the same concept and model
might be applicable to other databases and the software
framework is flexible enough to allow for other use cases.},
month = {May},
date = {2020-05-04},
organization = {EGU2020: Sharing Geoscience Online,
Vienna (Austria), 4 May 2020 - 8 May
2020},
subtyp = {Other},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {512 - Data-Intensive Science and Federated Computing
(POF3-512) / IntelliAQ - Artificial Intelligence for Air
Quality (787576) / Earth System Data Exploration (ESDE)},
pid = {G:(DE-HGF)POF3-512 / G:(EU-Grant)787576 /
G:(DE-Juel-1)ESDE},
typ = {PUB:(DE-HGF)6},
doi = {10.5194/egusphere-egu2020-13357},
url = {https://juser.fz-juelich.de/record/875342},
}