% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Reuter:850083,
author = {Reuter, Niels and GENON, Sarah and Kharabian, Shahrzad and
Eickhoff, Simon and Hoffstaedter, Felix and Patil, Kaustubh},
title = {{T}he effect of outliers and their exclusion on
restingstateconnectivity-based parcellation},
reportid = {FZJ-2018-04164},
year = {2018},
note = {This study was supported by the Deutsche
Forschungsgemeinschaft (DFG, EI 816/11-1), the National
Institute of Mental Health (R01-MH074457), the Helmholtz
Portfolio Theme "Supercomputing and Modeling for the Human
Brain" and and theEuropean Union’s Horizon 2020 Research
and Innovation Programme under Grant Agreement 785907 (HBP
SGA2)},
abstract = {TitleThe effect of outliers and their exclusion on
resting-state connectivity-based parcellationAuthorsNiels
Reuter, Sarah Genon, Shahrzad Kharabian, Tobias Kalenscher,
Felix Hoffstaedter, RainerGoebel, Simon Eickhoff, Kaustubh
PatilContent [3957 characters inc.
spaces]IntroductionRegional connectivity-based parcellation
(CBP) aims to find biologically meaningful parcels
orsubregions. This is achieved by clustering the voxels in a
region of interest (ROI) based on theirconnectivity
profiles. Using a large resting-state fMRI (rs-fMRI) sample,
we show that deviantconnectivity profiles substantially
influence group-based clustering results. Such outliers can
arisedue to various reasons and we investigated one possible
reason for high dimensional data:difference in intrinsic
dimensionality.MethodsThe Right (R) insula ROI (Fig. 2C),
subject to repeated CBP analyses [1], was defined using
theHarvard Oxford Atlas [2]. rs-fMRI data from 408 healthy
unrelated subjects (2mm isotropic,TR=0.72s, age 22-37, 205
males) from the Human Connectome Project [3] were included.
FIXdenoiseddata was preprocessed with SPM8 [4] using unified
segmentation [5], 5mm FWHMsmoothed, WM-CSF signal regressed,
and frequency-filtered (0.01-0.08 Hz). Correlations
betweentime-series of each ROI voxel and all brain
gray-matter voxels were computed and Fisher
Ztransformed,yielding an ROI-to-whole-brain connectivity
matrix per subject. k-means (with k from2 to 5) was
performed on each connectivity matrix. To identify outliers,
for each subject a nearestneighborsubject was identified
using Euclidean distance between connectivity matrices.
Theresulting vector d was Z-scored (Fig. 1A). k-means (k=2)
clustering of d revealed a separationaround 0, providing a
conservative threshold (Fig.1B). Two further thresholds were
chosen: 1.69(.95 left tail area on a standard normal
distribution) and a liberal 2.5. Group parcellations for
each kusing hierarchical clustering with average linkage and
Hamming distance were calculated afterexcluding outliers
based on these thresholds. The adjusted rand index (ARI)
between k-meanscluster results of all subjects was computed,
retaining the highest values per subject as a
similarityvector a (Fig. 1C). Lastly, principal component
analysis was performed on the connectivitymatrices, noting
the number of components retaining $95\%$ of variance.
Correlating the componentnumbers to d uncovers whether there
is a relationship between intrinsic dimensionality of
theconnectivity matrices and their distances to one
another.ResultsApplying the thresholds of 0, 1.69, and 2.5
removed 134, 32, and 14 subjects, respectively.
Whencorrelating distances d (Fig. 1A) to the similarity
vector a (Fig. 1C), we found correlations of -.38,-.41,
-.49, and -.53, for k=2, 3, 4, and 5, respectively. This
result suggests outliers clusterdifferently, thus including
them into a group-level consensus might be detrimental.
Accordingly,differences were found between group-level
parcellations (Fig. 2A). For instance, when comparingthe
liberal 2.5 threshold-removed group parcellation (Fig. 2D,
column two) with a groupparcellation without outlier removal
(Fig. 2D, column one), there was only an $81\%$
overlap,ARI=.55 for k=3 (ARI=.67 and .71 for k=4, 5, resp.).
Further comparisons are illustrated in Figure2D. The
distances d were related to the number of principal
components retaining $95\%$ of variancewith correlation of
-.79 (Fig. 2B). That is, if intrinsic dimensionality was low
for a subject, theassociated connectivity matrix would be
more distant to the rest of the sample (Fig.
2D).ConclusionThe differences in clusterings highlights the
influence of outliers. While assessment of the
grouplevelparcellations reveals that clustering results were
relatively stable across thresholds for k=2(Fig. 2D), ample
evidence suggests more than 2 clusters in the R-insula
[6,7,8]. As linkagealgorithms in hierarchical clustering as
well as k-means clustering are sensitive to outliers [9], it
isimportant to remove them by using a proper identification
threshold. In the future we will focus onautomatic
identification of parameters that lead to biologically
meaningful parcellations.},
month = {Jun},
date = {2018-06-18},
organization = {2018 Annual Meeting of the
Organization of Human Brain Mapping
(OHBM), Singapore (Singapore), 18 Jun
2018 - 22 Jun 2018},
subtyp = {Other},
cin = {INM-7},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {571 - Connectivity and Activity (POF3-571) / HBP SGA2 -
Human Brain Project Specific Grant Agreement 2 (785907) /
SMHB - Supercomputing and Modelling for the Human Brain
(HGF-SMHB-2013-2017)},
pid = {G:(DE-HGF)POF3-571 / G:(EU-Grant)785907 /
G:(DE-Juel1)HGF-SMHB-2013-2017},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/850083},
}