% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Antonopoulos:1022029,
author = {Antonopoulos, Georgios and More, Shammi and Raimondo,
Federico and Patil, Kaustubh},
title = {{S}tacking ensemble for age-prediction improves performance
and privacy},
reportid = {FZJ-2024-01166},
year = {2023},
abstract = {Brain-age prediction (BAP) using structural MRI has shown
great potential for studying healthy aging anddisease. Two
major desirable properties for BAP are high accuracy and
data privacy. We propose astacking ensemble model (SEM)
which improves both compared to current implementations.Our
SEM consists of two levels (L0 and L1). At L0, we used an
873-parcel atlas to group gray-mattervolume voxels, and
trained one GLMnet model for each parcel. The out-of-sample
(OOS, using 3-fold cross-validation) predictions from all L0
models were used as features to train a GLMnet model at L1
whichprovides the final age-prediction. To make predictions
on an independent test-set, L0 models were trainedon the
whole dataset (Figure 1).We explored two different ways to
train models at L0 and L1, i.e., i) using pooled data from
different sites,and ii) treating each site separately and
then averaging their outcomes. To compare with
currentstandards we also tested models using average GMV in
each parcel as inputs of L1. Additionally, to test thecase
where enough data is available at the test site, we
estimated L0-level OOS predictions on the testdata. These
were then used to obtain predictions using L1 models. The
former schemes provide differentlevels and types of privacy
advantage. The latter provides an advantage for clinical
applications, as onlyL0-level predictions need to be shared
and not the raw data.We used T1w MRI scans of healthy
subjects from 4 open datasets (IXI, eNKI, CamCAN and
1000Gehirne)with n>500 each (total N=3103, 18-90 age range).
We performed leave-one-site-out analysis and testedthe
impact of using one or more datasets for training.The
highest test performance was observed for the set-ups with
L0-level predictions coming from the testdata, with the best
set up using pooled predictions of L0 from three sites to
train the L1 model (MAE=4.7)followed by the L1 models
trained on 3 sites separately (MAE=4.8). This set-up
provides improved dataprivacy as L0 analysis can be
performed at the application site and only predictions need
to be shared.Set-ups based on mean GMV performed the worst
(MAE=6.5-7.3). We also found that L0 models providerobust
interpretation of regional aging effects, i.e. the Pearson
correlation of real age with predicted-agewas higher than
with GMV.},
month = {Jun},
date = {2023-06-12},
organization = {Helmholtz AI, Hamburg (Germany), 12
Jun 2023 - 14 Jun 2023},
subtyp = {After Call},
cin = {INM-7},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5253 - Neuroimaging (POF4-525) / 5251 - Multilevel Brain
Organization and Variability (POF4-525)},
pid = {G:(DE-HGF)POF4-5253 / G:(DE-HGF)POF4-5251},
typ = {PUB:(DE-HGF)24},
doi = {10.34734/FZJ-2024-01166},
url = {https://juser.fz-juelich.de/record/1022029},
}