% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Luo:1041321,
      author       = {Luo, Xiaoliang and Rechardt, Akilles and Sun, Guangzhi and
                      Nejad, Kevin K. and Yáñez, Felipe and Yilmaz, Bati and
                      Lee, Kangjoo and Cohen, Alexandra O. and Borghesani,
                      Valentina and Pashkov, Anton and Marinazzo, Daniele and
                      Nicholas, Jonathan and Salatiello, Alessandro and
                      Sucholutsky, Ilia and Minervini, Pasquale and Razavi, Sepehr
                      and Rocca, Roberta and Yusifov, Elkhan and Okalova, Tereza
                      and Gu, Nianlong and Ferianc, Martin and Khona, Mikail and
                      Patil, Kaustubh R. and Lee, Pui-Shee and Mata, Rui and
                      Myers, Nicholas E. and Bizley, Jennifer K. and Musslick,
                      Sebastian and Bilgin, Isil Poyraz and Niso, Guiomar and
                      Ales, Justin M. and Gaebler, Michael and Ratan Murty, N.
                      Apurva and Loued-Khenissi, Leyla and Behler, Anna and Hall,
                      Chloe M. and Dafflon, Jessica and Bao, Sherry Dongqi and
                      Love, Bradley C.},
      title        = {{L}arge language models surpass human experts in predicting
                      neuroscience results},
      journal      = {Nature human behaviour},
      volume       = {9},
      number       = {2},
      issn         = {2397-3374},
      address      = {London},
      publisher    = {Nature Research},
      reportid     = {FZJ-2025-02220},
      pages        = {305 - 315},
      year         = {2025},
      abstract     = {cientific discoveries often hinge on synthesizing decades
                      of research, a task that potentially outstrips human
                      information processing capacities. Large language models
                      (LLMs) offer a solution. LLMs trained on the vast scientific
                      literature could potentially integrate noisy yet
                      interrelated findings to forecast novel results better than
                      human experts. Here, to evaluate this possibility, we
                      created BrainBench, a forward-looking benchmark for
                      predicting neuroscience results. We find that LLMs surpass
                      experts in predicting experimental outcomes. BrainGPT, an
                      LLM we tuned on the neuroscience literature, performed
                      better yet. Like human experts, when LLMs indicated high
                      confidence in their predictions, their responses were more
                      likely to be correct, which presages a future where LLMs
                      assist humans in making discoveries. Our approach is not
                      neuroscience specific and is transferable to other
                      knowledge-intensive endeavours.},
      cin          = {INM-7},
      ddc          = {150},
      cid          = {I:(DE-Juel1)INM-7-20090406},
      pnm          = {5254 - Neuroscientific Data Analytics and AI (POF4-525)},
      pid          = {G:(DE-HGF)POF4-5254},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {39604572},
      UT           = {WOS:001365146700001},
      doi          = {10.1038/s41562-024-02046-9},
      url          = {https://juser.fz-juelich.de/record/1041321},
}