% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Chen:1014812,
      author       = {Chen, Zhiyi and Hu, Bowen and Liu, Xuerong and Becker,
                      Benjamin and Eickhoff, Simon B. and Miao, Kuan and Gu,
                      Xingmei and Tang, Yancheng and Dai, Xin and Li, Chao and
                      Leonov, Artemiy and Xiao, Zhibing and Feng, Zhengzhi and
                      Chen, Ji and Chuan-Peng, Hu},
      title        = {{S}ampling inequalities affect generalization of
                      neuroimaging-based diagnostic classifiers in psychiatry},
      journal      = {BMC medicine},
      volume       = {21},
      number       = {1},
      issn         = {1741-7015},
      address      = {Heidelberg [u.a.]},
      publisher    = {Springer},
      reportid     = {FZJ-2023-03482},
      pages        = {241},
      year         = {2023},
      note         = {This work was supported by the PLA Key Research Foundation
                      (CWS20J007), PLA Talent Program Foundation (2022160258), the
                      STI2030-Major Projects (No. 2022ZD0214000), the National Key
                      $R\&D$ Program of China (No. 2021YFC2502200) and the
                      National Natural Science Foundation of China (No.
                      82201658).},
      abstract     = {AbstractBackground The development of machine learning
                      models for aiding in the diagnosis of mental disorder is
                      rec‑ognized as a significant breakthrough in the field of
                      psychiatry. However, clinical practice of such models
                      remains achallenge, with poor generalizability being a major
                      limitation.Methods Here, we conducted a pre‑registered
                      meta‑research assessment on neuroimaging‑based models in
                      thepsychiatric literature, quantitatively examining global
                      and regional sampling issues over recent decades, from a
                      viewthat has been relatively underexplored. A total of 476
                      studies (n = 118,137) were included in the current
                      assessment.Based on these findings, we built a comprehensive
                      5‑star rating system to quantitatively evaluate the
                      quality of exist‑ing machine learning models for
                      psychiatric diagnoses.Results A global sampling inequality
                      in these models was revealed quantitatively (sampling Gini
                      coefficient(G) = 0.81, p < .01), varying across different
                      countries (regions) (e.g., China, G = 0.47; the USA, G =
                      0.58; Germany,G = 0.78; the UK, G = 0.87). Furthermore, the
                      severity of this sampling inequality was significantly
                      predicted by nationaleconomic levels (β = − 2.75, p <
                      .001, R2adj = 0.40; r = − .84, $95\%$ CI: − .41 to −
                      .97), and was plausibly predictable formodel performance,
                      with higher sampling inequality for reporting higher
                      classification accuracy. Further analysesshowed that lack of
                      independent testing $(84.24\%$ of models, $95\%$ CI:
                      $81.0–87.5\%),$ improper cross‑validation $(51.68\%of$
                      models, $95\%$ CI: $47.2–56.2\%),$ and poor technical
                      transparency $(87.8\%$ of models, $95\%$ CI:
                      $84.9–90.8\%)/availability(80.88\%$ of models, $95\%$ CI:
                      $77.3–84.4\%)$ are prevailing in current diagnostic
                      classifiers despite improvements overtime. Relating to these
                      observations, model performances were found decreased in
                      studies with independent cross‑country sampling
                      validations (all p < .001, BF10 > 15). In light of this, we
                      proposed a purpose‑built quantitative assess‑ment
                      checklist, which demonstrated that the overall ratings of
                      these models increased by publication year but
                      werenegatively associated with model performance.Conclusions
                      Together, improving sampling economic equality and hence the
                      quality of machine learning modelsmay be a crucial facet to
                      plausibly translating neuroimaging‑based diagnostic
                      classifiers into clinical practice.Keywords Psychiatric
                      machine learning, Diagnostic classification,
                      Meta‑analysis, Neuroimaging, Sampling inequalities},
      cin          = {INM-7},
      ddc          = {610},
      cid          = {I:(DE-Juel1)INM-7-20090406},
      pnm          = {5252 - Brain Dysfunction and Plasticity (POF4-525)},
      pid          = {G:(DE-HGF)POF4-5252},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {37400814},
      UT           = {WOS:001022895400003},
      doi          = {10.1186/s12916-023-02941-4},
      url          = {https://juser.fz-juelich.de/record/1014812},
}