% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Komeyer:1049623,
      author       = {Komeyer, Vera and Nieto, Nicolás and Eickhoff, Simon B.
                      and Raimondo, Federico and Patil, Kaustubh R.},
      title        = {{O}verview of {C}hallenges in {B}rain-{B}ased {P}redictive
                      {M}odeling: {T}oward {M}eaningful {P}redictive {I}nsights},
      journal      = {Biological psychiatry},
      volume       = {.},
      issn         = {0006-3223},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {FZJ-2025-05411},
      pages        = {S000632232501460X},
      year         = {2025},
      note         = {This work was supported by the Helmholtz Imagining grant
                      BrainShapes (Grant No. ZT-I-PF-4-062 [to KRP]); the
                      Multi-Omics Data Science project was funded from the program
                      Profilbildung 2020 (Grant No. PROFILNRW-2020-107-A [to
                      SBE]), an initiative of the Ministry of Culture and Science
                      of the State of North Rhine-Westphalia; the H2020 Research
                      Infrastructures (Grant No. EBRAIN-Health 101058516 [to
                      SBE]); the Deutsche Forschungsgemeinschaft Collaborative
                      Research Centre CRC1451 (Project No. 431549029 [to SBE]) on
                      motor performance project B05; and the Universitätsklinikum
                      Düsseldorf, Forschungskommission funded project VoxNorm [to
                      KRP].},
      abstract     = {Predictive analytics based on machine learning (ML) and
                      artificial intelligence is a powerful tool enabling
                      precision psychiatry and providing insights into
                      brain-behavior relationships. However, given the mixed
                      results observed in the field so far, making meaningful
                      progress requires careful consideration of several key
                      challenges to ensure the validity of models and findings,
                      including overfitting, confounding biases, site effect
                      harmonization, and interpretability, among others. First, we
                      highlight limitations of cross-validation, a ubiquitous ML
                      strategy used to prevent overfitting and obtain
                      generalization estimates, emphasizing the risk of
                      performance inflation and the need for independent
                      validation. Next, we introduce different types of so-called
                      third variables that can influence the examination of a
                      brain-behavioral relationship of interest in different ways,
                      using causal inference principles. We emphasize the biasing
                      impact of confounding variables on ML models and summarize
                      common mitigation strategies. We then discuss site-specific
                      effects in multisite datasets, reviewing different
                      harmonization strategies to reduce unwanted variability and
                      site-specific noise. Finally, we explore post hoc model
                      interpretation methods to enhance model transparency while
                      cautioning against misinterpretation. By integrating
                      rigorous result validation, confounder control, and
                      interpretability techniques, researchers can ensure that ML
                      models produce more reliable and generalizable findings and
                      avoid spurious associations.KeywordsBrain-behavior
                      associationsConfoundsCross-validationHarmonizationMachine
                      learningModel interpretability},
      cin          = {INM-7},
      ddc          = {610},
      cid          = {I:(DE-Juel1)INM-7-20090406},
      pnm          = {5251 - Multilevel Brain Organization and Variability
                      (POF4-525)},
      pid          = {G:(DE-HGF)POF4-5251},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.1016/j.biopsych.2025.09.003},
      url          = {https://juser.fz-juelich.de/record/1049623},
}