% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{ESamadi:1047199,
author = {E. Samadi, Moein and Nikulina, Kateryna and Fritsch,
Sebastian Johannes and Schuppert, Andreas},
title = {{GPT}-4o and the quest for machine learning
interpretability in {ICU} risk of death prediction},
journal = {BMC medical informatics and decision making},
volume = {25},
number = {1},
issn = {1472-6947},
address = {London},
publisher = {BioMed Central},
reportid = {FZJ-2025-04148},
pages = {373},
year = {2025},
abstract = {Background:Clinical utilization of machine learning is
hampered by the lack of interpretability inherent in most
non-linear black box modeling approaches, reducing trust
among clinicians and regulators. Advanced large language
models offer a potential framework for integrating medical
knowledge into these models, potentially enhancing their
interpretability.Methods:A hybrid mechanistic/data-driven
modeling framework is presented for developing an ICU risk
of death prediction model for mechanically ventilated
patients. In the mechanistic modeling part, GPT-4o is used
to generate detailed medical feature descriptions, which are
then aggregated into a comprehensive corpus and processed
with TF-I DF vectorization. Fuzzy C-means clustering is
subsequently applied to these vectorized features to
identify significant mortality cause-specific feature
clusters, and a physician reviewed the resulting clusters to
validate their relevance to actionable insights for clinical
decision support. In the data-driven part, the identified
clusters inform the creation of XGBoost-based weak
classifiers, whose outcomes are combined into a single
XGBoost-based strong classifier through a hierarchically
structured feed-forward network. This process results in a
novel GPT hybrid model for ICU risk of death
prediction.Results:This study enrolled 16,018 mechanically
ventilated ICU patients, divided into derivation (12,758)
and validation (3,260) cohorts, to develop and evaluate a
GPT hybrid model for predicting in-ICU death. Leveraging
GPT-4o, we implemented an automated process for clustering
mortality cause-specific features, resulting in six feature
clusters: Liver Failure, Infection, Renal Failure, Hypoxia,
Cardiac Failure, and Mechanical Ventilation. This approach
significantly improved upon previous manual methods,
automating the reconstruction of structured hybrid models.
While the GPT hybrid model showed similar predictive
accuracy to a Global XGBoost model, it demonstrated superior
interpretability and clinical relevance by incorporating a
wider array of features and providing a hierarchical
structure of feature importance aligned with medical
knowledge.Conclusion:We introduce a novel approach to
predicting in-ICU risk of death for mechanically ventilated
patients using a GPT hybrid model. Our methodology
demonstrates the potential of integrating large language
models with traditional machine learning techniques to
create interpretable and clinically relevant predictive
models.},
cin = {JSC / CASA},
ddc = {610},
cid = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)CASA-20230315},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / 5111 - Domain-Specific
Simulation $\&$ Data Life Cycle Labs (SDLs) and Research
Groups (POF4-511) / SDI-S - SDI-S: Smart Data Innovation
Services - Experimentelle Erprobung und Entwicklung von
KI-Dienstverbünden für Innovationen auf industriellen
Daten (01IS22095D)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5111 /
G:(BMBF)01IS22095D},
typ = {PUB:(DE-HGF)16},
doi = {10.1186/s12911-025-03224-z},
url = {https://juser.fz-juelich.de/record/1047199},
}