% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{John:1007707,
      author       = {John, Chelsea Maria and Ebert, Jan and Penke, Carolin and
                      Kesselheim, Stefan and Herten, Andreas},
      title        = {{O}pen{GPT}-{X} – {T}raining {L}arge {L}anguage {M}odels
                      on {HPC} {S}ystems},
      reportid     = {FZJ-2023-02173},
      year         = {2023},
      abstract     = {OpenGPT-X is a German initiative to build and train large
                      language models (LLMs). The project aims at providing an
                      open alternative to LLMs which are up to now private
                      property, along with a platform for researching methods to
                      train multilingual LLMs efficiently. For that, the project
                      not only utilizes the state-of-the-art in training models
                      but also incorporates new methods, algorithms, and tools.
                      Models trained within the project will be published and used
                      for pilot language services by industry partners. In
                      addition, further applications are expected through Gaia-X
                      federation. LLMs can scale to more than 175 Billion
                      parameters, which requires efficient usage of supercomputers
                      like JUWELS Booster. Especially in the light of the recent
                      successes of ChatGPT, our work clearly indicates that the
                      infrastructure of supercomputing centres and initiatives
                      aiming to provide resources to the public can have a large
                      societal impact. This poster outlines the initial progress
                      and future work of the project from Jülich Supercomputing
                      Center (JSC).},
      month         = {May},
      date          = {2023-05-21},
      organization  = {ISC High Performance 2023, Hamburg
                       (Germany), 21 May 2023 - 25 May 2023},
      subtyp        = {After Call},
      keywords     = {HPC (Other) / GPU (Other) / OpenGPTX (Other)},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511) / 5121 - Supercomputing $\&$
                      Big Data Facilities (POF4-512) / ATML-X-DEV - ATML
                      Accelerating Devices (ATML-X-DEV)},
      pid          = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5121 /
                      G:(DE-Juel-1)ATML-X-DEV},
      typ          = {PUB:(DE-HGF)24},
      doi          = {10.34732/XDVBLG-SVNDMJ},
      url          = {https://juser.fz-juelich.de/record/1007707},
}