% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Goldermann:1048425,
      author       = {Goldermann, Lavinia and Fonck, Simon and Olivier, Lena and
                      Fritsch, Sebastian and Stollenwerk, André},
      title        = {{T}he {I}nfluence of {H}uman {A}nnotation on {CNN}
                      {P}erformance for {A}nomaly {D}etection in {ICU} {D}ata},
      journal      = {Current directions in biomedical engineering},
      volume       = {11},
      number       = {1},
      issn         = {2364-5504},
      address      = {Berlin},
      publisher    = {De Gruyter},
      reportid     = {FZJ-2025-04636},
      pages        = {362 - 365},
      year         = {2025},
      abstract     = {Deep learning methods are increasingly used in clinical
                      artificial intelligence (AI) research, including for
                      detecting anomalies in intensive care data. However, their
                      evaluation often depends on human annotations, which can
                      vary in quality and consistency. In this study, we analyse
                      the effect of annotation variability on the performance of
                      DeepAnT, an unsupervised convolutional neural network for
                      anomaly detection (AD). Using intensive care time-series
                      data from 38 patients for training and six patients
                      separately annotated for evaluation, we compare F1 scores
                      based on two independent physician annotations. Our results
                      show differences in model performance across different vital
                      parameters, between patients, and especially between
                      annotators evaluating the same data. These findings indicate
                      that human labelling has a measurable impact on the
                      perceived performance of the AD algorithm. Structured
                      labelling protocols may be beneficial for achieving more
                      consistent and reliable evaluations.},
      cin          = {JSC / CASA},
      ddc          = {570},
      cid          = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)CASA-20230315},
      pnm          = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511)},
      pid          = {G:(DE-HGF)POF4-5112},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.1515/cdbme-2025-0192},
      url          = {https://juser.fz-juelich.de/record/1048425},
}