% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Penke:910080,
author = {Penke, Carolin and John, Chelsea Maria and Herten, Andreas
and Ebert, Jan and Kesselheim, Stefan and Suarez, Estela},
title = {{O}pen{GPT}-{X} - {T}raining {L}arge {L}anguage {M}odels on
{HPC} {S}ystems},
reportid = {FZJ-2022-03599},
year = {2022},
abstract = {Artificial neural networks represent an HPC workload with
increasing importance. In particular the field of Natural
Language Processing (NLP) has been undergoing a revolution
in recent years. The training of ever larger language
models, such as GPT-3, demands large HPC resources and has
the potential to greatly impact everyday technology. The
OpenGPT-X project was established in 2022 and aims to not
leave this field to large tech companies but to provide an
open, publicly funded alternative based on European values.
The Jülich Supercomputing Centre is a consortium partner
providing HPC infrastructure for the pre-training of the
models. We research the optimization potential in the
training process for example by using novel accelerator
architectures.},
month = {Sep},
date = {2022-09-28},
organization = {14th JLESC Workshop, Urbana-Champaign
(USA), 28 Sep 2022 - 30 Sep 2022},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / ATML-X-DEV - ATML
Accelerating Devices (ATML-X-DEV)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-Juel-1)ATML-X-DEV},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/910080},
}