% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Flge:1048464,
      author       = {Flöge, Klemens and Udayakumar, Srisruthi and Sommer,
                      Johanna and Piraud, Marie and Kesselheim, Stefan and
                      Fortuin, Vincent and Günnemann, Stephan and van der Weg,
                      Karel J. and Gohlke, Holger and Merdivan, Erinc and
                      Bazarova, Alina},
      title        = {{O}ne{P}rot: {T}owards multi-modal protein foundation
                      models via latent space alignment of sequence, structure,
                      binding sites and text encoders},
      journal      = {PLoS Computational Biology},
      volume       = {21},
      number       = {11},
      issn         = {1553-734X},
      address      = {San Francisco, Calif.},
      publisher    = {Public Library of Science},
      reportid     = {FZJ-2025-04662},
      pages        = {e1013679},
      year         = {2025},
      abstract     = {Recent advances in Artificial Intelligence have enabled
                      multi-modal systems to model and translate diverse
                      information spaces. Extending beyond text and vision, we
                      introduce OneProt, a multi-modal Deep Learning model for
                      proteins that integrates structural, sequence, text, and
                      binding site data. Using the ImageBind framework, OneProt
                      aligns the latent spaces of protein modality encoders in a
                      lightweight fine-tuning scheme that focuses on pairwise
                      alignment with sequence data, rather than requiring full
                      matches. This novel approach comprises a mix of Graph Neural
                      Networks and transformer architectures. It demonstrates good
                      performance in retrieval tasks and showcases the efficacy of
                      multi-modal systems in Protein Machine Learning through a
                      broad spectrum of downstream baselines, including enzyme
                      function prediction and binding site analysis. Furthermore,
                      OneProt enables the transfer of representational information
                      from specialized encoders to the sequence encoder, enhancing
                      capabilities for distinguishing evolutionarily related and
                      unrelated sequences and exhibiting representational
                      properties where evolutionarily related proteins align in
                      similar directions within the latent space. In addition, we
                      extensively investigate modality ablations to identify the
                      encoders that contribute the most to predictive performance,
                      highlighting the significance of the binding site encoder,
                      which has not been used in similar models previously. This
                      work expands the horizons of multi-modal protein models,
                      paving the way for transformative applications in drug
                      discovery, biocatalytic reaction planning, and protein
                      engineering.},
      cin          = {IBG-4 / JSC},
      ddc          = {610},
      cid          = {I:(DE-Juel1)IBG-4-20200403 / I:(DE-Juel1)JSC-20090406},
      pnm          = {2171 - Biological and environmental resources for
                      sustainable use (POF4-217) / 5112 - Cross-Domain Algorithms,
                      Tools, Methods Labs (ATMLs) and Research Groups (POF4-511) /
                      Helmholtz AI Consultant Team FB Information (E54.303.11)},
      pid          = {G:(DE-HGF)POF4-2171 / G:(DE-HGF)POF4-5112 /
                      G:(DE-Juel-1)E54.303.11},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.1371/journal.pcbi.1013679},
      url          = {https://juser.fz-juelich.de/record/1048464},
}