% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{ESamadi:1047199,
      author       = {E. Samadi, Moein and Nikulina, Kateryna and Fritsch,
                      Sebastian Johannes and Schuppert, Andreas},
      title        = {{GPT}-4o and the quest for machine learning
                      interpretability in {ICU} risk of death prediction},
      journal      = {BMC medical informatics and decision making},
      volume       = {25},
      number       = {1},
      issn         = {1472-6947},
      address      = {London},
      publisher    = {BioMed Central},
      reportid     = {FZJ-2025-04148},
      pages        = {373},
      year         = {2025},
      abstract     = {Background:Clinical utilization of machine learning is
                      hampered by the lack of interpretability inherent in most
                      non-linear black box modeling approaches, reducing trust
                      among clinicians and regulators. Advanced large language
                      models offer a potential framework for integrating medical
                      knowledge into these models, potentially enhancing their
                      interpretability.Methods:A hybrid mechanistic/data-driven
                      modeling framework is presented for developing an ICU risk
                      of death prediction model for mechanically ventilated
                      patients. In the mechanistic modeling part, GPT-4o is used
                      to generate detailed medical feature descriptions, which are
                      then aggregated into a comprehensive corpus and processed
                      with TF-I DF vectorization. Fuzzy C-means clustering is
                      subsequently applied to these vectorized features to
                      identify significant mortality cause-specific feature
                      clusters, and a physician reviewed the resulting clusters to
                      validate their relevance to actionable insights for clinical
                      decision support. In the data-driven part, the identified
                      clusters inform the creation of XGBoost-based weak
                      classifiers, whose outcomes are combined into a single
                      XGBoost-based strong classifier through a hierarchically
                      structured feed-forward network. This process results in a
                      novel GPT hybrid model for ICU risk of death
                      prediction.Results:This study enrolled 16,018 mechanically
                      ventilated ICU patients, divided into derivation (12,758)
                      and validation (3,260) cohorts, to develop and evaluate a
                      GPT hybrid model for predicting in-ICU death. Leveraging
                      GPT-4o, we implemented an automated process for clustering
                      mortality cause-specific features, resulting in six feature
                      clusters: Liver Failure, Infection, Renal Failure, Hypoxia,
                      Cardiac Failure, and Mechanical Ventilation. This approach
                      significantly improved upon previous manual methods,
                      automating the reconstruction of structured hybrid models.
                      While the GPT hybrid model showed similar predictive
                      accuracy to a Global XGBoost model, it demonstrated superior
                      interpretability and clinical relevance by incorporating a
                      wider array of features and providing a hierarchical
                      structure of feature importance aligned with medical
                      knowledge.Conclusion:We introduce a novel approach to
                      predicting in-ICU risk of death for mechanically ventilated
                      patients using a GPT hybrid model. Our methodology
                      demonstrates the potential of integrating large language
                      models with traditional machine learning techniques to
                      create interpretable and clinically relevant predictive
                      models.},
      cin          = {JSC / CASA},
      ddc          = {610},
      cid          = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)CASA-20230315},
      pnm          = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511) / 5111 - Domain-Specific
                      Simulation $\&$ Data Life Cycle Labs (SDLs) and Research
                      Groups (POF4-511) / SDI-S - SDI-S: Smart Data Innovation
                      Services - Experimentelle Erprobung und Entwicklung von
                      KI-Dienstverbünden für Innovationen auf industriellen
                      Daten (01IS22095D)},
      pid          = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5111 /
                      G:(BMBF)01IS22095D},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.1186/s12911-025-03224-z},
      url          = {https://juser.fz-juelich.de/record/1047199},
}