% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Sharafutdinov:910683,
author = {Sharafutdinov, Konstantin and Bhat, Jayesh S. and Fritsch,
Sebastian Johannes and Nikulina, Kateryna and E. Samadi,
Moein and Polzin, Richard and Mayer, Hannah and Marx, Gernot
and Bickenbach, Johannes and Schuppert, Andreas},
title = {{A}pplication of convex hull analysis for the evaluation of
data heterogeneity between patient populations of different
origin and implications of hospital bias in downstream
machine-learning-based data processing: {A} comparison of 4
critical-care patient datasets},
journal = {Frontiers in Big Data},
volume = {5},
issn = {2624-909X},
address = {Lausanne},
publisher = {Frontiers Media},
reportid = {FZJ-2022-04055},
pages = {603429},
year = {2022},
abstract = {Machine learning (ML) models are developed on a learning
dataset covering only a small part of the data of interest.
If model predictions are accurate for the learning dataset
but fail for unseen data then generalization error is
considered high. This problem manifests itself within all
major sub-fields of ML but is especially relevant in medical
applications. Clinical data structures, patient cohorts, and
clinical protocols may be highly biased among hospitals such
that sampling of representative learning datasets to learn
ML models remains a challenge. As ML models exhibit poor
predictive performance over data ranges sparsely or not
covered by the learning dataset, in this study, we propose a
novel method to assess their generalization capability among
different hospitals based on the convex hull (CH) overlap
between multivariate datasets. To reduce dimensionality
effects, we used a two-step approach. First, CH analysis was
applied to find mean CH coverage between each of the two
datasets, resulting in an upper bound of the prediction
range. Second, 4 types of ML models were trained to classify
the origin of a dataset (i.e., from which hospital) and to
estimate differences in datasets with respect to underlying
distributions. To demonstrate the applicability of our
method, we used 4 critical-care patient datasets from
different hospitals in Germany and USA. We estimated the
similarity of these populations and investigated whether ML
models developed on one dataset can be reliably applied to
another one. We show that the strongest drop in performance
was associated with the poor intersection of convex hulls in
the corresponding hospitals' datasets and with a high
performance of ML methods for dataset discrimination. Hence,
we suggest the application of our pipeline as a first tool
to assess the transferability of trained models. We
emphasize that datasets from different hospitals represent
heterogeneous data sources, and the transfer from one
database to another should be performed with utmost care to
avoid implications during real-world applications of the
developed models. Further research is needed to develop
methods for the adaptation of ML models to new hospitals. In
addition, more work should be aimed at the creation of
gold-standard datasets that are large and diverse with data
from varied application sites.},
cin = {JSC},
ddc = {004},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / SMITH -
Medizininformatik-Konsortium - Beitrag Forschungszentrum
Jülich (01ZZ1803M)},
pid = {G:(DE-HGF)POF4-5112 / G:(BMBF)01ZZ1803M},
typ = {PUB:(DE-HGF)16},
pubmed = {36387013},
UT = {WOS:000885597500001},
doi = {10.3389/fdata.2022.603429},
url = {https://juser.fz-juelich.de/record/910683},
}