% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Komeyer:1049623,
author = {Komeyer, Vera and Nieto, Nicolás and Eickhoff, Simon B.
and Raimondo, Federico and Patil, Kaustubh R.},
title = {{O}verview of {C}hallenges in {B}rain-{B}ased {P}redictive
{M}odeling: {T}oward {M}eaningful {P}redictive {I}nsights},
journal = {Biological psychiatry},
volume = {.},
issn = {0006-3223},
address = {Amsterdam [u.a.]},
publisher = {Elsevier Science},
reportid = {FZJ-2025-05411},
pages = {S000632232501460X},
year = {2025},
note = {This work was supported by the Helmholtz Imagining grant
BrainShapes (Grant No. ZT-I-PF-4-062 [to KRP]); the
Multi-Omics Data Science project was funded from the program
Profilbildung 2020 (Grant No. PROFILNRW-2020-107-A [to
SBE]), an initiative of the Ministry of Culture and Science
of the State of North Rhine-Westphalia; the H2020 Research
Infrastructures (Grant No. EBRAIN-Health 101058516 [to
SBE]); the Deutsche Forschungsgemeinschaft Collaborative
Research Centre CRC1451 (Project No. 431549029 [to SBE]) on
motor performance project B05; and the Universitätsklinikum
Düsseldorf, Forschungskommission funded project VoxNorm [to
KRP].},
abstract = {Predictive analytics based on machine learning (ML) and
artificial intelligence is a powerful tool enabling
precision psychiatry and providing insights into
brain-behavior relationships. However, given the mixed
results observed in the field so far, making meaningful
progress requires careful consideration of several key
challenges to ensure the validity of models and findings,
including overfitting, confounding biases, site effect
harmonization, and interpretability, among others. First, we
highlight limitations of cross-validation, a ubiquitous ML
strategy used to prevent overfitting and obtain
generalization estimates, emphasizing the risk of
performance inflation and the need for independent
validation. Next, we introduce different types of so-called
third variables that can influence the examination of a
brain-behavioral relationship of interest in different ways,
using causal inference principles. We emphasize the biasing
impact of confounding variables on ML models and summarize
common mitigation strategies. We then discuss site-specific
effects in multisite datasets, reviewing different
harmonization strategies to reduce unwanted variability and
site-specific noise. Finally, we explore post hoc model
interpretation methods to enhance model transparency while
cautioning against misinterpretation. By integrating
rigorous result validation, confounder control, and
interpretability techniques, researchers can ensure that ML
models produce more reliable and generalizable findings and
avoid spurious associations.KeywordsBrain-behavior
associationsConfoundsCross-validationHarmonizationMachine
learningModel interpretability},
cin = {INM-7},
ddc = {610},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5251 - Multilevel Brain Organization and Variability
(POF4-525)},
pid = {G:(DE-HGF)POF4-5251},
typ = {PUB:(DE-HGF)16},
doi = {10.1016/j.biopsych.2025.09.003},
url = {https://juser.fz-juelich.de/record/1049623},
}