% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Luo:1041321,
author = {Luo, Xiaoliang and Rechardt, Akilles and Sun, Guangzhi and
Nejad, Kevin K. and Yáñez, Felipe and Yilmaz, Bati and
Lee, Kangjoo and Cohen, Alexandra O. and Borghesani,
Valentina and Pashkov, Anton and Marinazzo, Daniele and
Nicholas, Jonathan and Salatiello, Alessandro and
Sucholutsky, Ilia and Minervini, Pasquale and Razavi, Sepehr
and Rocca, Roberta and Yusifov, Elkhan and Okalova, Tereza
and Gu, Nianlong and Ferianc, Martin and Khona, Mikail and
Patil, Kaustubh R. and Lee, Pui-Shee and Mata, Rui and
Myers, Nicholas E. and Bizley, Jennifer K. and Musslick,
Sebastian and Bilgin, Isil Poyraz and Niso, Guiomar and
Ales, Justin M. and Gaebler, Michael and Ratan Murty, N.
Apurva and Loued-Khenissi, Leyla and Behler, Anna and Hall,
Chloe M. and Dafflon, Jessica and Bao, Sherry Dongqi and
Love, Bradley C.},
title = {{L}arge language models surpass human experts in predicting
neuroscience results},
journal = {Nature human behaviour},
volume = {9},
number = {2},
issn = {2397-3374},
address = {London},
publisher = {Nature Research},
reportid = {FZJ-2025-02220},
pages = {305 - 315},
year = {2025},
abstract = {cientific discoveries often hinge on synthesizing decades
of research, a task that potentially outstrips human
information processing capacities. Large language models
(LLMs) offer a solution. LLMs trained on the vast scientific
literature could potentially integrate noisy yet
interrelated findings to forecast novel results better than
human experts. Here, to evaluate this possibility, we
created BrainBench, a forward-looking benchmark for
predicting neuroscience results. We find that LLMs surpass
experts in predicting experimental outcomes. BrainGPT, an
LLM we tuned on the neuroscience literature, performed
better yet. Like human experts, when LLMs indicated high
confidence in their predictions, their responses were more
likely to be correct, which presages a future where LLMs
assist humans in making discoveries. Our approach is not
neuroscience specific and is transferable to other
knowledge-intensive endeavours.},
cin = {INM-7},
ddc = {150},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5254 - Neuroscientific Data Analytics and AI (POF4-525)},
pid = {G:(DE-HGF)POF4-5254},
typ = {PUB:(DE-HGF)16},
pubmed = {39604572},
UT = {WOS:001365146700001},
doi = {10.1038/s41562-024-02046-9},
url = {https://juser.fz-juelich.de/record/1041321},
}