% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Reuter:850083,
      author       = {Reuter, Niels and GENON, Sarah and Kharabian, Shahrzad and
                      Eickhoff, Simon and Hoffstaedter, Felix and Patil, Kaustubh},
      title        = {{T}he effect of outliers and their exclusion on
                      restingstateconnectivity-based parcellation},
      reportid     = {FZJ-2018-04164},
      year         = {2018},
      note         = {This study was supported by the Deutsche
                      Forschungsgemeinschaft (DFG, EI 816/11-1), the National
                      Institute of Mental Health (R01-MH074457), the Helmholtz
                      Portfolio Theme "Supercomputing and Modeling for the Human
                      Brain" and and theEuropean Union’s Horizon 2020 Research
                      and Innovation Programme under Grant Agreement 785907 (HBP
                      SGA2)},
      abstract     = {TitleThe effect of outliers and their exclusion on
                      resting-state connectivity-based parcellationAuthorsNiels
                      Reuter, Sarah Genon, Shahrzad Kharabian, Tobias Kalenscher,
                      Felix Hoffstaedter, RainerGoebel, Simon Eickhoff, Kaustubh
                      PatilContent [3957 characters inc.
                      spaces]IntroductionRegional connectivity-based parcellation
                      (CBP) aims to find biologically meaningful parcels
                      orsubregions. This is achieved by clustering the voxels in a
                      region of interest (ROI) based on theirconnectivity
                      profiles. Using a large resting-state fMRI (rs-fMRI) sample,
                      we show that deviantconnectivity profiles substantially
                      influence group-based clustering results. Such outliers can
                      arisedue to various reasons and we investigated one possible
                      reason for high dimensional data:difference in intrinsic
                      dimensionality.MethodsThe Right (R) insula ROI (Fig. 2C),
                      subject to repeated CBP analyses [1], was defined using
                      theHarvard Oxford Atlas [2]. rs-fMRI data from 408 healthy
                      unrelated subjects (2mm isotropic,TR=0.72s, age 22-37, 205
                      males) from the Human Connectome Project [3] were included.
                      FIXdenoiseddata was preprocessed with SPM8 [4] using unified
                      segmentation [5], 5mm FWHMsmoothed, WM-CSF signal regressed,
                      and frequency-filtered (0.01-0.08 Hz). Correlations
                      betweentime-series of each ROI voxel and all brain
                      gray-matter voxels were computed and Fisher
                      Ztransformed,yielding an ROI-to-whole-brain connectivity
                      matrix per subject. k-means (with k from2 to 5) was
                      performed on each connectivity matrix. To identify outliers,
                      for each subject a nearestneighborsubject was identified
                      using Euclidean distance between connectivity matrices.
                      Theresulting vector d was Z-scored (Fig. 1A). k-means (k=2)
                      clustering of d revealed a separationaround 0, providing a
                      conservative threshold (Fig.1B). Two further thresholds were
                      chosen: 1.69(.95 left tail area on a standard normal
                      distribution) and a liberal 2.5. Group parcellations for
                      each kusing hierarchical clustering with average linkage and
                      Hamming distance were calculated afterexcluding outliers
                      based on these thresholds. The adjusted rand index (ARI)
                      between k-meanscluster results of all subjects was computed,
                      retaining the highest values per subject as a
                      similarityvector a (Fig. 1C). Lastly, principal component
                      analysis was performed on the connectivitymatrices, noting
                      the number of components retaining $95\%$ of variance.
                      Correlating the componentnumbers to d uncovers whether there
                      is a relationship between intrinsic dimensionality of
                      theconnectivity matrices and their distances to one
                      another.ResultsApplying the thresholds of 0, 1.69, and 2.5
                      removed 134, 32, and 14 subjects, respectively.
                      Whencorrelating distances d (Fig. 1A) to the similarity
                      vector a (Fig. 1C), we found correlations of -.38,-.41,
                      -.49, and -.53, for k=2, 3, 4, and 5, respectively. This
                      result suggests outliers clusterdifferently, thus including
                      them into a group-level consensus might be detrimental.
                      Accordingly,differences were found between group-level
                      parcellations (Fig. 2A). For instance, when comparingthe
                      liberal 2.5 threshold-removed group parcellation (Fig. 2D,
                      column two) with a groupparcellation without outlier removal
                      (Fig. 2D, column one), there was only an $81\%$
                      overlap,ARI=.55 for k=3 (ARI=.67 and .71 for k=4, 5, resp.).
                      Further comparisons are illustrated in Figure2D. The
                      distances d were related to the number of principal
                      components retaining $95\%$ of variancewith correlation of
                      -.79 (Fig. 2B). That is, if intrinsic dimensionality was low
                      for a subject, theassociated connectivity matrix would be
                      more distant to the rest of the sample (Fig.
                      2D).ConclusionThe differences in clusterings highlights the
                      influence of outliers. While assessment of the
                      grouplevelparcellations reveals that clustering results were
                      relatively stable across thresholds for k=2(Fig. 2D), ample
                      evidence suggests more than 2 clusters in the R-insula
                      [6,7,8]. As linkagealgorithms in hierarchical clustering as
                      well as k-means clustering are sensitive to outliers [9], it
                      isimportant to remove them by using a proper identification
                      threshold. In the future we will focus onautomatic
                      identification of parameters that lead to biologically
                      meaningful parcellations.},
      month         = {Jun},
      date          = {2018-06-18},
      organization  = {2018 Annual Meeting of the
                       Organization of Human Brain Mapping
                       (OHBM), Singapore (Singapore), 18 Jun
                       2018 - 22 Jun 2018},
      subtyp        = {Other},
      cin          = {INM-7},
      cid          = {I:(DE-Juel1)INM-7-20090406},
      pnm          = {571 - Connectivity and Activity (POF3-571) / HBP SGA2 -
                      Human Brain Project Specific Grant Agreement 2 (785907) /
                      SMHB - Supercomputing and Modelling for the Human Brain
                      (HGF-SMHB-2013-2017)},
      pid          = {G:(DE-HGF)POF3-571 / G:(EU-Grant)785907 /
                      G:(DE-Juel1)HGF-SMHB-2013-2017},
      typ          = {PUB:(DE-HGF)24},
      url          = {https://juser.fz-juelich.de/record/850083},
}