% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Varikuti:840630,
author = {Varikuti, Deepthi and GENON, Sarah and Sotiras, Aristeidis
and Schwender, Holger and Hoffstaedter, Felix and Jockwitz,
Christiane and Caspers, Svenja and Moebus, Susanne and
Amunts, Katrin and Davatzikos, Christos and Eickhoff, Simon},
title = {{E}valuation of {N}on-negative matrix {F}actorization of
grey matter in age prediction},
reportid = {FZJ-2017-08134},
year = {2017},
abstract = {It has been shown that machine-learning methods applied to
voxel-based morphometry (VBM) data allows the prediction of
brain age [1]. Dimensionality reduction is a critical aspect
of such brain-based prediction of phenotypical
characteristics to counter the curse of dimensionality
associated with voxel-wise analysis. While previous
age-predictions have employed PCA based compression,
non-negative matrix factorization (NNMF) has recently been
suggested as a plausible factorization of high-dimensional
VBM data [4]. Non-negativity and sparsity of the components
obtained from NNMF facilitate relatively more optimal
solution than the PCA based compression [4]. Here, we
evaluate, i) whether NNMF compression allows predictions of
biological age that reproduce those from previously reported
analyses [2], ii) the impact of the NNMF’s granularity on
the prediction accuracy, iii) the possible effect of the
factorizations derived from different datasets on the
prediction, and iv) whether explicit adjustment can address
the model bias inherent to many brain-based
predictions.Methods:VBM8 preprocessing (using only
non-linear modulation and 8 mm FWHM smoothing [3]) was used
to compute voxel-wise GM volumes for two datasets, 1) 693
healthy older adults (age: 55-75 years) scanned at a single
site (“1000BRAINS) [1], 2) 1084 healthy adults (age: 18-81
years), scanned at multiple sites (“Mixed”) (Fig 1A).
NNMF solutions for both groups were derived at different
levels of granularity. Age prediction was performed by
fitting LASSO regression models either on the coefficient
matrix from the respective NNMF or by those that were
derived from projecting a group’s data on the respective
other groups components. Model generalization was evaluated
by 10-fold cross-validation replicated 25 times. To address
the known bias towards the mean, i.e., overestimation of
young and underestimation of older subjects, we additionally
tested models that explicitly fitted the regression-slope
between the real and predicted training set and used this to
adjust the expected slope of the test set to 45
degrees.Results:In both datasets, NNMF components resembled
neurobiologically reasonable patterning of the brain (Fig
1B). Prediction accuracy based on the projection of data on
the components from either group was virtually identical
(Fig 2A). For both datasets, mean absolute errors (MAE)
declined with higher granularity of the components and
reached values well comparable to previous approaches even
when using components derived from an independent sample
(MAE: 3.6 years for 1000BRAINS; 6.4 years for Mixed).
Plotting the prediction error relative to the biological age
of the subjects revealed the bias towards the mean across
both datasets (Fig 2B). Adjusting for the slope estimated in
the training set allows removing this bias, though it needs
to be noted that this comes at the cost of reduced
precision, i.e., unbiased estimates yield a slightly higher
MAE.Conclusion:NNMF allows the definition of co-variation
patterns in VBM data. Due to the non- negativity and
sparseness, NNMF enable substantially easier and higher
biological interpretation than other methods for data
compression such as PCA [4]. We showed that NNMF compression
of VBM data over the lifespan allows predicting previously
unseen subjects’ age with a precision that is comparable
to earlier reports using PCA for data compression [2], while
offering the potential for neurobiological interpretation.
Importantly, accuracy seems to be independent of whether the
components were derived from the same dataset or from a
dataset that is not only independent but also different in
age distribution. We note that accuracies tend to
continuously decrease with higher granularity, although
performance tends to plateau at about 300 components.
Finally, adjusting the inherent bias of sparse regression
models yields unbiased out-of-sample predictions but comes
at the expense of slightly higher mean errors.},
month = {Jun},
date = {2017-06-25},
organization = {Annual Meeting of the Organization for
Human Brain Mapping (OHBM), Vancouver
(Canada), 25 Jun 2017 - 29 Jun 2017},
subtyp = {Other},
cin = {INM-7},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {571 - Connectivity and Activity (POF3-571) / SMHB -
Supercomputing and Modelling for the Human Brain
(HGF-SMHB-2013-2017) / HBP SGA1 - Human Brain Project
Specific Grant Agreement 1 (720270)},
pid = {G:(DE-HGF)POF3-571 / G:(DE-Juel1)HGF-SMHB-2013-2017 /
G:(EU-Grant)720270},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/840630},
}