% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Chen:1006999,
author = {Chen, Jianzhong and Ooi, Leon Qi Rong and Tan, Trevor Wei
Kiat and Zhang, Shaoshi and Li, Jingwei and Asplund,
Christopher L. and Eickhoff, Simon B and Bzdok, Danilo and
Holmes, Avram J and Yeo, B. T. Thomas},
title = {{R}elationship {B}etween {P}rediction {A}ccuracy and
{F}eature {I}mportance {R}eliability: an {E}mpirical and
{T}heoretical {S}tudy},
journal = {NeuroImage},
volume = {274},
issn = {1053-8119},
address = {Orlando, Fla.},
publisher = {Academic Press},
reportid = {FZJ-2023-01938},
pages = {120115 -},
year = {2023},
abstract = {There is significant interest in using neuroimaging data to
predict behavior. The predictive models are often
interpreted by the computation of feature importance, which
quantifies the predictive relevance of an imaging feature.
Tian and Zalesky (2021) suggest that feature importance
estimates exhibit low split-half reliability, as well as a
trade-off between prediction accuracy and feature importance
reliability across parcellation resolutions. However, it is
unclear whether the trade-off between prediction accuracy
and feature importance reliability is universal. Here, we
demonstrate that, with a sufficient sample size, feature
importance (operationalized as Haufe-transformed weights)
can achieve fair to excellent split-half reliability. With a
sample size of 2600 participants, Haufe-transformed weights
achieve average intra-class correlation coefficients of
0.75, 0.57 and 0.53 for cognitive, personality and mental
health measures respectively. Haufe-transformed weights are
much more reliable than original regression weights and
univariate FC-behavior correlations. Original regression
weights are not reliable even with 2600 participants.
Intriguingly, feature importance reliability is strongly
positively correlated with prediction accuracy across
phenotypes. Within a particular behavioral domain, there is
no clear relationship between prediction performance and
feature importance reliability across regression models.
Furthermore, we show mathematically that feature importance
reliability is necessary, but not sufficient, for low
feature importance error. In the case of linear models,
lower feature importance error is mathematically related to
lower prediction error. Therefore, higher feature importance
reliability might yield lower feature importance error and
higher prediction accuracy. Finally, we discuss how our
theoretical results relate with the reliability of imaging
features and behavioral measures. Overall, the current study
provides empirical and theoretical insights into the
relationship between prediction accuracy and feature
importance reliability.},
cin = {INM-7},
ddc = {610},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5251 - Multilevel Brain Organization and Variability
(POF4-525)},
pid = {G:(DE-HGF)POF4-5251},
typ = {PUB:(DE-HGF)16},
pubmed = {37088322},
UT = {WOS:001005138000001},
doi = {10.1016/j.neuroimage.2023.120115},
url = {https://juser.fz-juelich.de/record/1006999},
}