000893374 001__ 893374
000893374 005__ 20230815122843.0
000893374 0247_ $$2doi$$a10.1021/acs.jctc.1c00129
000893374 0247_ $$2ISSN$$a1549-9618
000893374 0247_ $$2ISSN$$a1549-9626
000893374 0247_ $$2Handle$$a2128/28154
000893374 0247_ $$2altmetric$$aaltmetric:108134031
000893374 0247_ $$2pmid$$a34161735
000893374 0247_ $$2WOS$$aWOS:000674289800059
000893374 037__ $$aFZJ-2021-02715
000893374 082__ $$a610
000893374 1001_ $$0P:(DE-HGF)0$$aMulnaes, Daniel$$b0
000893374 245__ $$aTopDomain: Exhaustive Protein Domain Boundary Metaprediction Combining Multisource Information and Deep Learning
000893374 260__ $$aWashington, DC$$c2021
000893374 3367_ $$2DRIVER$$aarticle
000893374 3367_ $$2DataCite$$aOutput Types/Journal article
000893374 3367_ $$0PUB:(DE-HGF)16$$2PUB:(DE-HGF)$$aJournal Article$$bjournal$$mjournal$$s1626172944_14697
000893374 3367_ $$2BibTeX$$aARTICLE
000893374 3367_ $$2ORCID$$aJOURNAL_ARTICLE
000893374 3367_ $$00$$2EndNote$$aJournal Article
000893374 520__ $$aProtein domains are independent, functional, and stable structural units of proteins. Accurate protein domain boundary prediction plays an important role in understanding protein structure and evolution, as well as for protein structure prediction. Current domain boundary prediction methods differ in terms of boundary definition, methodology, and training databases resulting in disparate performance for different proteins. We developed TopDomain, an exhaustive metapredictor, that uses deep neural networks to combine multisource information from sequence- and homology-based features of over 50 primary predictors. For this purpose, we developed a new domain boundary data set termed the TopDomain data set, in which the true annotations are informed by SCOPe annotations, structural domain parsers, human inspection, and deep learning. We benchmark TopDomain against 2484 targets with 3354 boundaries from the TopDomain test set and achieve F1 scores of 78.4% and 73.8% for multidomain boundary prediction within ±20 residues and ±10 residues of the true boundary, respectively. When examined on targets from CASP11-13 competitions, TopDomain achieves F1 scores of 47.5% and 42.8% for multidomain proteins. TopDomain significantly outperforms 15 widely used, state-of-the-art ab initio and homology-based domain boundary predictors. Finally, we implemented TopDomainTMC, which accurately predicts whether domain parsing is necessary for the target protein.
000893374 536__ $$0G:(DE-HGF)POF4-5111$$a5111 - Domain-Specific Simulation Data Life Cycle Labs (SDLs) and Research Groups (POF4-511)$$cPOF4-511$$fPOF IV$$x0
000893374 536__ $$0G:(DE-HGF)POF4-2171$$a2171 - Biological and environmental resources for sustainable use (POF4-217)$$cPOF4-217$$fPOF IV$$x1
000893374 536__ $$0G:(DE-HGF)POF4-2172$$a2172 - Utilization of renewable carbon and energy sources and engineering of ecosystem functions (POF4-217)$$cPOF4-217$$fPOF IV$$x2
000893374 536__ $$0G:(DE-Juel1)hkf7_20200501$$aForschergruppe Gohlke (hkf7_20200501)$$chkf7_20200501$$fForschergruppe Gohlke$$x3
000893374 536__ $$0G:(GEPRIS)267205415$$aDFG project 267205415 - SFB 1208: Identität und Dynamik von Membransystemen - von Molekülen bis zu zellulären Funktionen $$c267205415$$x4
000893374 588__ $$aDataset connected to CrossRef, Journals: juser.fz-juelich.de
000893374 7001_ $$0P:(DE-HGF)0$$aGolchin, Pegah$$b1
000893374 7001_ $$0P:(DE-HGF)0$$aKoenig, Filip$$b2
000893374 7001_ $$0P:(DE-Juel1)172663$$aGohlke, Holger$$b3$$eCorresponding author
000893374 773__ $$0PERI:(DE-600)2166976-4$$a10.1021/acs.jctc.1c00129$$gp. acs.jctc.1c00129$$n7$$p4599–4613$$tJournal of chemical theory and computation$$v17$$x1549-9626$$y2021
000893374 8564_ $$uhttps://juser.fz-juelich.de/record/893374/files/acs.jctc.1c00129.pdf
000893374 8564_ $$uhttps://juser.fz-juelich.de/record/893374/files/TopDomain_Ms_JCTC_rev_final.pdf$$yPublished on 2021-06-23. Available in OpenAccess from 2022-06-23.
000893374 909CO $$ooai:juser.fz-juelich.de:893374$$pdnbdelivery$$pdriver$$pVDB$$popen_access$$popenaire
000893374 9101_ $$0I:(DE-588b)5008462-8$$6P:(DE-Juel1)172663$$aForschungszentrum Jülich$$b3$$kFZJ
000893374 9131_ $$0G:(DE-HGF)POF4-511$$1G:(DE-HGF)POF4-510$$2G:(DE-HGF)POF4-500$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$9G:(DE-HGF)POF4-5111$$aDE-HGF$$bKey Technologies$$lEngineering Digital Futures – Supercomputing, Data Management and Information Security for Knowledge and Action$$vEnabling Computational- & Data-Intensive Science and Engineering$$x0
000893374 9131_ $$0G:(DE-HGF)POF4-217$$1G:(DE-HGF)POF4-210$$2G:(DE-HGF)POF4-200$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$9G:(DE-HGF)POF4-2171$$aDE-HGF$$bForschungsbereich Erde und Umwelt$$lErde im Wandel – Unsere Zukunft nachhaltig gestalten$$vFür eine nachhaltige Bio-Ökonomie – von Ressourcen zu Produkten$$x1
000893374 9131_ $$0G:(DE-HGF)POF4-217$$1G:(DE-HGF)POF4-210$$2G:(DE-HGF)POF4-200$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$9G:(DE-HGF)POF4-2172$$aDE-HGF$$bForschungsbereich Erde und Umwelt$$lErde im Wandel – Unsere Zukunft nachhaltig gestalten$$vFür eine nachhaltige Bio-Ökonomie – von Ressourcen zu Produkten$$x2
000893374 9141_ $$y2021
000893374 915__ $$0StatID:(DE-HGF)0200$$2StatID$$aDBCoverage$$bSCOPUS$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)0300$$2StatID$$aDBCoverage$$bMedline$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)0530$$2StatID$$aEmbargoed OpenAccess
000893374 915__ $$0StatID:(DE-HGF)0100$$2StatID$$aJCR$$bJ CHEM THEORY COMPUT : 2019$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)9905$$2StatID$$aIF >= 5$$bJ CHEM THEORY COMPUT : 2019$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)0113$$2StatID$$aWoS$$bScience Citation Index Expanded$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)0150$$2StatID$$aDBCoverage$$bWeb of Science Core Collection$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)1150$$2StatID$$aDBCoverage$$bCurrent Contents - Physical, Chemical and Earth Sciences$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)0160$$2StatID$$aDBCoverage$$bEssential Science Indicators$$d2021-02-02
000893374 915__ $$0StatID:(DE-HGF)0199$$2StatID$$aDBCoverage$$bClarivate Analytics Master Journal List$$d2021-02-02
000893374 920__ $$lyes
000893374 9201_ $$0I:(DE-Juel1)JSC-20090406$$kJSC$$lJülich Supercomputing Center$$x0
000893374 9201_ $$0I:(DE-Juel1)NIC-20090406$$kNIC$$lJohn von Neumann - Institut für Computing$$x1
000893374 9201_ $$0I:(DE-Juel1)IBI-7-20200312$$kIBI-7$$lStrukturbiochemie$$x2
000893374 9201_ $$0I:(DE-Juel1)IBG-4-20200403$$kIBG-4$$lBioinformatik$$x3
000893374 980__ $$ajournal
000893374 980__ $$aVDB
000893374 980__ $$aUNRESTRICTED
000893374 980__ $$aI:(DE-Juel1)JSC-20090406
000893374 980__ $$aI:(DE-Juel1)NIC-20090406
000893374 980__ $$aI:(DE-Juel1)IBI-7-20200312
000893374 980__ $$aI:(DE-Juel1)IBG-4-20200403
000893374 9801_ $$aFullTexts