% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Siebenmorgen:1037654,
author = {Siebenmorgen, Till and Menezes, Filipe and Benassou,
Sabrina and Merdivan, Erinc and Didi, Kieran and Mourão,
André Santos Dias and Kitel, Radosław and Liò, Pietro and
Kesselheim, Stefan and Piraud, Marie and Theis, Fabian J.
and Sattler, Michael and Popowicz, Grzegorz M.},
title = {{MISATO}: machine learning dataset of protein–ligand
complexes for structure-based drug discovery},
journal = {Nature computational science},
volume = {4},
number = {5},
issn = {2662-8457},
address = {London},
publisher = {Nature Research},
reportid = {FZJ-2025-00819},
pages = {367 - 378},
year = {2024},
abstract = {Large language models have greatly enhanced our ability to
understand biology and chemistry, yet robust methods for
structure-based drug discovery, quantum chemistry and
structural biology are still sparse. Precise
biomolecule–ligand interaction datasets are urgently
needed for large language models. To address this, we
present MISATO, a dataset that combines quantum mechanical
properties of small molecules and associated molecular
dynamics simulations of ~20,000 experimental
protein–ligand complexes with extensive validation of
experimental data. Starting from the existing experimental
structures, semi-empirical quantum mechanics was used to
systematically refine these structures. A large collection
of molecular dynamics traces of protein–ligand complexes
in explicit water is included, accumulating over 170 μs.
We give examples of machine learning (ML) baseline models
proving an improvement of accuracy by employing our data. An
easy entry point for ML experts is provided to enable the
next generation of drug discovery artificial intelligence
models.},
cin = {JSC},
ddc = {004},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511)},
pid = {G:(DE-HGF)POF4-5112},
typ = {PUB:(DE-HGF)16},
pubmed = {38730184},
UT = {WOS:001220857400002},
doi = {10.1038/s43588-024-00627-2},
url = {https://juser.fz-juelich.de/record/1037654},
}