% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Chen:915909,
author = {Chen, Jianzhong and Ooi, Leon Qi Rong and Li, Jingwei and
Asplund, Christopher L. and Eickhoff, Simon B and Bzdok,
Danilo and Holmes, Avram J and Yeo, B. T. Thomas},
title = {{T}here is no fundamental trade-off between prediction
accuracy and feature importance reliability},
reportid = {FZJ-2022-05778},
year = {2022},
abstract = {There is significant interest in using neuroimaging data to
predict behavior. The predictive models are often
interpreted by the computation of feature importance, which
quantifies the predictive relevance of an imaging feature.
Tian and Zalesky (2021) suggest that feature importance
estimates exhibit low test-retest reliability, pointing to a
potential trade-off between prediction accuracy and feature
importance reliability. This trade-off is counter-intuitive
because both prediction accuracy and test-retest reliability
reflect the reliability of brain-behavior relationships
across independent samples. Here, we revisit the
relationship between prediction accuracy and feature
importance reliability in a large well-powered dataset
across a wide range of behavioral measures. We demonstrate
that, with a sufficient sample size, feature importance
(operationalized as Haufe-transformed weights) can achieve
fair to excellent test-retest reliability. More
specifically, with a sample size of about 2600 participants,
Haufe-transformed weights achieve average intra-class
correlation coefficients of 0.75, 0.57 and 0.53 for
cognitive, personality and mental health measures
respectively. Haufe-transformed weights are much more
reliable than original regression weights and univariate
FC-behavior correlations. Intriguingly, feature importance
reliability is strongly positively correlated with
prediction accuracy across phenotypes. Within a particular
behavioral domain, there was no clear relationship between
prediction performance and feature importance reliability
across regression algorithms. Finally, we show
mathematically that feature importance reliability is
necessary, but not sufficient, for low feature importance
error. In the case of linear models, lower feature
importance error leads to lower prediction error (up to a
scaling by the feature covariance matrix). Overall, we find
no fundamental trade-off between feature importance
reliability and prediction accuracy.},
cin = {INM-7},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5251 - Multilevel Brain Organization and Variability
(POF4-525)},
pid = {G:(DE-HGF)POF4-5251},
typ = {PUB:(DE-HGF)25},
doi = {10.1101/2022.08.08.503167},
url = {https://juser.fz-juelich.de/record/915909},
}