% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Kambeitz:1049995,
      author       = {Kambeitz, Joseph and Schiffman, Jason and
                      Kambeitz-Ilankovic, Lana and Mittal, Vijay A. and Ettinger,
                      Ulrich and Vogeley, Kai},
      title        = {{T}he empirical structure of psychopathology is represented
                      in large language models},
      journal      = {Nature Mental Health},
      volume       = {3},
      number       = {12},
      issn         = {2731-6076},
      address      = {London},
      publisher    = {Nature Publishing Group UK},
      reportid     = {FZJ-2025-05709},
      pages        = {1482 - 1492},
      year         = {2025},
      note         = {The original studies analyzed in this work were supported
                      by the National Institute of Mental Health (Grant
                      R01MH112612) to J.S. and the Deutsche Forschungsgemeinschaft
                      (DFG) ET 31/7-1 to U.E. K.V. was supported within the
                      project SIMSUB (Grant 01GP2215) of the German Ministery of
                      Research, Technology and Space (BMFTR). The funders had no
                      role in study design, data collection and analysis, decision
                      to publish or preparation of the manuscript.},
      abstract     = {Clinical assessment and scientific research in psychiatry
                      are largely based on questionnaires that are used to assess
                      psychopathology. The development of large language models
                      (LLMs) offers a new perspective for analysis of the language
                      and terminology on which these questionnaires are based. We
                      used state-of-the-art LLMs to derive numerical
                      representations (‘text embeddings’) of the semantic and
                      sentiment content of items from established questionnaires
                      for the assessment of psychopathology. We compared the
                      pairwise associations between empirical data from
                      cross-sectional studies and text embeddings to test whether
                      the empirical structure of psychopathology can be
                      reconstructed by LLMs. Across four large-scale datasets
                      (n = 1,555, n = 1,099, n = 11,807 and
                      n = 39,755), we found a range of significant
                      correlations between empirical item-pair associations and
                      associations derived from text embeddings (r = 0.18 to
                      r = 0.57, all P < 0.05). Random forest regression
                      models based on semantic or sentiment embeddings predicted
                      empirical item-pair associations with moderate to high
                      accuracy (r = 0.33 to r = 0.81, all P < 0.05).
                      Similarly, empirical clustering of items and grouping to
                      established subdomain scores could be partly reconstructed
                      by text embeddings. Our results demonstrate that LLMs are
                      able to represent substantial components of the empirical
                      structure of psychopathology. Consequently, the integration
                      of LLMs into mental health research has the potential to
                      unlock numerous promising avenues. These may encompass
                      improving the process of developing questionnaires,
                      optimizing generalizability and reducing the redundancy of
                      existing questionnaires or facilitating the development of
                      new conceptualizations of mental disorders.},
      cin          = {INM-3},
      ddc          = {610},
      cid          = {I:(DE-Juel1)INM-3-20090406},
      pnm          = {5251 - Multilevel Brain Organization and Variability
                      (POF4-525)},
      pid          = {G:(DE-HGF)POF4-5251},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.1038/s44220-025-00527-y},
      url          = {https://juser.fz-juelich.de/record/1049995},
}