% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Penke:1034062,
author = {Penke, Carolin},
title = {{E}fficient {C}omputation of {L}ow-{R}ank {R}epresentations
to {R}educe {M}emory {R}equirements in {D}eep {L}earning},
reportid = {FZJ-2024-06883},
year = {2024},
abstract = {Computing an orthogonal basis that approximates the range
or corange of a matrix is a ubiquitous problem in
computational science and engineering. In numerous
applications, a rapid decay of singular values permits the
use of such bases to approximate a linear operator by
restricting it to low-rank subspaces, thereby significantly
reducing computational and storage demands. A powerful
approach for constructing a basis with a specified rank or
approximation tolerance is the (adaptive) randomized range
finder. In this talk, we introduce a novel variant of this
algorithm, based on the blocked Householder QR
decomposition, optimized for modern GPU accelerators. This
development is motivated by its potential to substantially
lower memory requirements during the training of deep neural
networks such as transformers. We discuss the GaLore
(Gradient Low-Rank Projection) training framework, and
demonstrate how the randomized range finder can be employed
to derive low-rank representations of optimizer states.
Further potential avenues for future research are
discussed.},
month = {Dec},
date = {2024-12-11},
organization = {RWTH Aachen SFB 1481 Colloquium,
Aachen (Germany), 11 Dec 2024 - 11 Dec
2024},
subtyp = {Invited},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / OpenGPT-X - Aufbau eines
Gaia-X Knotens für große KI-Sprachmodelle und innovative
Sprachapplikations-Services; Teilvorhaben: Optimierung und
Skalierung auf großen HPC-Systemen (68GX21007F)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-Juel-1)68GX21007F},
typ = {PUB:(DE-HGF)31},
doi = {10.34734/FZJ-2024-06883},
url = {https://juser.fz-juelich.de/record/1034062},
}