001037654 001__ 1037654
001037654 005__ 20250203124502.0
001037654 0247_ $$2doi$$a10.1038/s43588-024-00627-2
001037654 0247_ $$2datacite_doi$$a10.34734/FZJ-2025-00819
001037654 0247_ $$2pmid$$a38730184
001037654 0247_ $$2WOS$$aWOS:001220857400002
001037654 037__ $$aFZJ-2025-00819
001037654 082__ $$a004
001037654 1001_ $$00009-0008-5160-8100$$aSiebenmorgen, Till$$b0
001037654 245__ $$aMISATO: machine learning dataset of protein–ligand complexes for structure-based drug discovery
001037654 260__ $$aLondon$$bNature Research$$c2024
001037654 3367_ $$2DRIVER$$aarticle
001037654 3367_ $$2DataCite$$aOutput Types/Journal article
001037654 3367_ $$0PUB:(DE-HGF)16$$2PUB:(DE-HGF)$$aJournal Article$$bjournal$$mjournal$$s1737441865_21954
001037654 3367_ $$2BibTeX$$aARTICLE
001037654 3367_ $$2ORCID$$aJOURNAL_ARTICLE
001037654 3367_ $$00$$2EndNote$$aJournal Article
001037654 520__ $$aLarge language models have greatly enhanced our ability to understand biology and chemistry, yet robust methods for structure-based drug discovery, quantum chemistry and structural biology are still sparse. Precise biomolecule–ligand interaction datasets are urgently needed for large language models. To address this, we present MISATO, a dataset that combines quantum mechanical properties of small molecules and associated molecular dynamics simulations of ~20,000 experimental protein–ligand complexes with extensive validation of experimental data. Starting from the existing experimental structures, semi-empirical quantum mechanics was used to systematically refine these structures. A large collection of molecular dynamics traces of protein–ligand complexes in explicit water is included, accumulating over 170 μs. We give examples of machine learning (ML) baseline models proving an improvement of accuracy by employing our data. An easy entry point for ML experts is provided to enable the next generation of drug discovery artificial intelligence models.
001037654 536__ $$0G:(DE-HGF)POF4-5112$$a5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs) and Research Groups (POF4-511)$$cPOF4-511$$fPOF IV$$x0
001037654 588__ $$aDataset connected to CrossRef, Journals: juser.fz-juelich.de
001037654 7001_ $$00000-0002-7630-5447$$aMenezes, Filipe$$b1
001037654 7001_ $$0P:(DE-Juel1)192312$$aBenassou, Sabrina$$b2$$ufzj
001037654 7001_ $$0P:(DE-HGF)0$$aMerdivan, Erinc$$b3
001037654 7001_ $$00000-0001-6839-3320$$aDidi, Kieran$$b4
001037654 7001_ $$0P:(DE-HGF)0$$aMourão, André Santos Dias$$b5
001037654 7001_ $$0P:(DE-HGF)0$$aKitel, Radosław$$b6
001037654 7001_ $$0P:(DE-HGF)0$$aLiò, Pietro$$b7
001037654 7001_ $$0P:(DE-Juel1)185654$$aKesselheim, Stefan$$b8
001037654 7001_ $$0P:(DE-HGF)0$$aPiraud, Marie$$b9
001037654 7001_ $$00000-0002-2419-1943$$aTheis, Fabian J.$$b10
001037654 7001_ $$00000-0002-1594-0527$$aSattler, Michael$$b11
001037654 7001_ $$00000-0003-2818-7498$$aPopowicz, Grzegorz M.$$b12$$eCorresponding author
001037654 773__ $$0PERI:(DE-600)3029424-1$$a10.1038/s43588-024-00627-2$$gVol. 4, no. 5, p. 367 - 378$$n5$$p367 - 378$$tNature computational science$$v4$$x2662-8457$$y2024
001037654 8564_ $$uhttps://juser.fz-juelich.de/record/1037654/files/s43588-024-00627-2.pdf$$yOpenAccess
001037654 909CO $$ooai:juser.fz-juelich.de:1037654$$pdnbdelivery$$pdriver$$pVDB$$popen_access$$popenaire
001037654 9101_ $$0I:(DE-588b)5008462-8$$6P:(DE-Juel1)192312$$aForschungszentrum Jülich$$b2$$kFZJ
001037654 9101_ $$0I:(DE-588b)5008462-8$$6P:(DE-Juel1)185654$$aForschungszentrum Jülich$$b8$$kFZJ
001037654 9131_ $$0G:(DE-HGF)POF4-511$$1G:(DE-HGF)POF4-510$$2G:(DE-HGF)POF4-500$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$9G:(DE-HGF)POF4-5112$$aDE-HGF$$bKey Technologies$$lEngineering Digital Futures – Supercomputing, Data Management and Information Security for Knowledge and Action$$vEnabling Computational- & Data-Intensive Science and Engineering$$x0
001037654 9141_ $$y2024
001037654 915__ $$0StatID:(DE-HGF)0200$$2StatID$$aDBCoverage$$bSCOPUS$$d2024-12-13
001037654 915__ $$0LIC:(DE-HGF)CCBY4$$2HGFVOC$$aCreative Commons Attribution CC BY 4.0
001037654 915__ $$0StatID:(DE-HGF)0100$$2StatID$$aJCR$$bNAT COMPUT SCI : 2022$$d2024-12-13
001037654 915__ $$0StatID:(DE-HGF)0112$$2StatID$$aWoS$$bEmerging Sources Citation Index$$d2024-12-13
001037654 915__ $$0StatID:(DE-HGF)0150$$2StatID$$aDBCoverage$$bWeb of Science Core Collection$$d2024-12-13
001037654 915__ $$0StatID:(DE-HGF)3003$$2StatID$$aDEAL Nature$$d2024-12-13$$wger
001037654 915__ $$0StatID:(DE-HGF)9910$$2StatID$$aIF >= 10$$bNAT COMPUT SCI : 2022$$d2024-12-13
001037654 915__ $$0StatID:(DE-HGF)0510$$2StatID$$aOpenAccess
001037654 915__ $$0StatID:(DE-HGF)0300$$2StatID$$aDBCoverage$$bMedline$$d2024-12-13
001037654 915__ $$0StatID:(DE-HGF)0199$$2StatID$$aDBCoverage$$bClarivate Analytics Master Journal List$$d2024-12-13
001037654 9201_ $$0I:(DE-Juel1)JSC-20090406$$kJSC$$lJülich Supercomputing Center$$x0
001037654 9801_ $$aFullTexts
001037654 980__ $$ajournal
001037654 980__ $$aVDB
001037654 980__ $$aUNRESTRICTED
001037654 980__ $$aI:(DE-Juel1)JSC-20090406