% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Hoffbauer:1033763,
      author       = {Hoffbauer, Tilman and Strodel, Birgit},
      title        = {{T}rans{MEP}: {T}ransfer learning on large protein language
                      models to predict mutation effects of proteins from a small
                      known dataset},
      journal      = {bioRxiv},
      reportid     = {FZJ-2024-06604},
      pages        = {23},
      year         = {2024},
      abstract     = {Machine learning-guided optimization has become a driving
                      force for recent improvements in protein engineering. In
                      addition, new protein language models are learning the
                      grammar of evolutionarily occurring sequences at large
                      scales. This work combines both approaches to make
                      predictions about mutational effects that support protein
                      engineering. To this end, an easy-to-use software tool
                      called TransMEP is developed using transfer learning by
                      feature extraction with Gaussian process regression. A large
                      collection of datasets is used to evaluate its quality,
                      which scales with the size of the training set, and to show
                      its improvements over previous fine-tuning approaches.
                      Wet-lab studies are simulated to evaluate the use of
                      mutation effect prediction models for protein engineering.
                      This showed that TransMEP finds the best performing mutants
                      with a limited study budget by considering the trade-off
                      between exploration and exploitation.},
      cin          = {IBI-7},
      cid          = {I:(DE-Juel1)IBI-7-20200312},
      pnm          = {5241 - Molecular Information Processing in Cellular Systems
                      (POF4-524)},
      pid          = {G:(DE-HGF)POF4-5241},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.1101/2024.01.12.575432},
      url          = {https://juser.fz-juelich.de/record/1033763},
}