% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Penke:1034068,
      author       = {Penke, Carolin},
      title        = {{E}fficient {C}omputation of {L}ow-{R}ank {R}epresentations
                      to {R}educe {M}emory {R}equirements in {LLM} {T}raining},
      reportid     = {FZJ-2024-06889},
      year         = {2024},
      abstract     = {The OpenGPT-X project represents one of Europe’s
                      pioneering publicly funded efforts in the domain of large
                      language models (LLMs), covering the entire lifecycle from
                      pre-training foundational models to fine-tuning and
                      practical application development. To maximize the
                      efficiency of training on High Performance Computing (HPC)
                      resources, strategies aimed at reducing computational and
                      memory demands are being explored. A promising avenue
                      exploits the low-rank structure of gradients, as done in the
                      LoRA or GaLore frameworks, the latter of which relies on the
                      computation of dominant low-rank subspaces during training.
                      The randomized range finder algorithm provides a more
                      efficient alternative to computing a full singular value
                      decomposition (SVD). We introduce a novel variant of the
                      range finder, based on the blocked Householder QR
                      decomposition, optimized for modern GPU accelerators.},
      month         = {Nov},
      date          = {2024-11-26},
      organization  = {LoRAINNe’24: workshop on LOw-Rank
                       Approximations and their Interactions
                       with Neural NEtworks, Nancy (France),
                       26 Nov 2024 - 27 Nov 2024},
      subtyp        = {Invited},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511) / OpenGPT-X - Aufbau eines
                      Gaia-X Knotens für große KI-Sprachmodelle und innovative
                      Sprachapplikations-Services; Teilvorhaben: Optimierung und
                      Skalierung auf großen HPC-Systemen (68GX21007F)},
      pid          = {G:(DE-HGF)POF4-5112 / G:(DE-Juel-1)68GX21007F},
      typ          = {PUB:(DE-HGF)6},
      doi          = {10.34734/FZJ-2024-06889},
      url          = {https://juser.fz-juelich.de/record/1034068},
}