% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Kaffashzadeh:864041,
author = {Kaffashzadeh, Najmeh and Schröder, Sabine and Schultz,
Martin},
title = {{A} {N}ovel {C}oncept for {A}utomated {Q}uality {C}ontrol
of {A}tmospheric {T}ime {S}eries},
reportid = {FZJ-2019-03957},
year = {2019},
abstract = {Measurements of atmospheric physical and chemical
parameters are essential for atmospheric model
evaluation,trend analysis, climate prediction, and other
applications. Particularly when the time series from various
measure-ment instruments or data providers are merged
together, assessing the quality of the data presents a major
challengeand often relies on subjective screening. The
quality of the time series can be affected by several error
types, suchas random error, systematic error due to
calibration errors, and gross error from malfunctioning
instruments, ordata processing errors, such as mistyped
values and improper date-time formats. Some of these errors
may havea considerable impact on the statistical analysis of
the time series. Thus, identifying the quality of the data,
i.e.quality control (QC), is an essential step for any data
analysis.Here, we present a software package for the
automated QC of the atmospheric time series based on the use
ofseveral algorithms that are in use at various
environmental agencies and research initiatives. The tool
can either beembedded in automated workflows to process
real-time data or be applied to a second-level analysis of
archivedmulti-year data. Several statistical tests are
grouped in categories with increasing complexity. Any number
of testscan be defined and run sequentially. The set of
statistical tests and any user arguments can easily be
configuredwith variable-specific control files in the JSON
format. This allows for easy integration into an automated
work-flow software and distributed data processing
services.For expressing the quality of a measured data
series, we introduced a probability concept which assigns
each valuea likelihood of being "good" data. Here, "good" is
interpreted in a statistical sense as belonging to an
expectedprobability distribution. Some of the tests
influence not only the probability of a single point but may
also impacton the probability of its neighboring points.We
tested the software with multi-annual hourly ozone and
temperature data from the database of the TroposphericOzone
Assessment Report (TOAR). Preliminary results indicate that
the concept works well and is able to dealwith a large and
heterogeneous dataset such as the global collection of ozone
data in the TOAR database.},
month = {Apr},
date = {2019-04-07},
organization = {European Geoscience Union (EGU),
Vienna (Austria), 7 Apr 2019 - 12 Apr
2019},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {512 - Data-Intensive Science and Federated Computing
(POF3-512) / IntelliAQ - Artificial Intelligence for Air
Quality (787576) / Earth System Data Exploration (ESDE)},
pid = {G:(DE-HGF)POF3-512 / G:(EU-Grant)787576 /
G:(DE-Juel-1)ESDE},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/864041},
}