% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Penke:1049808,
      author       = {Penke, Carolin and John, Chelsea Maria and Ebert, Jan and
                      Kesselheim, Stefan and Herten, Andreas},
      title        = {{T}raining {LLM}s on {HPC} {S}ystems: {B}est {P}ractices
                      from the {O}pen{GPT}-{X} {P}roject},
      publisher    = {arXiv},
      reportid     = {FZJ-2025-05592, 2504.10013},
      year         = {2025},
      abstract     = {The training of large language models (LLMs) requires
                      substantial computational resources, complex software
                      stacks, and carefully designed workflows to achieve
                      scalability and efficiency. This report presents best
                      practices and insights gained from the OpenGPT-X project, a
                      German initiative focused on developing open, multilingual
                      LLMs optimized for European languages. We detail the use of
                      high-performance computing (HPC) systems, primarily JUWELS
                      Booster at JSC, for training Teuken-7B, a
                      7-billion-parameter transformer model. The report covers
                      system architecture, training infrastructure, software
                      choices, profiling and benchmarking tools, as well as
                      engineering and operational challenges.},
      keywords     = {Distributed, Parallel, and Cluster Computing (cs.DC)
                      (Other) / FOS: Computer and information sciences (Other) /
                      C.4; I.2.11; I.2.7; K.6 (Other)},
      pnm          = {5122 - Future Computing $\&$ Big Data Systems (POF4-512) /
                      5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511) / ATML-X-DEV - ATML
                      Accelerating Devices (ATML-X-DEV) / OpenGPT-X - Aufbau eines
                      Gaia-X Knotens für große KI-Sprachmodelle und innovative
                      Sprachapplikations-Services; Teilvorhaben: Optimierung und
                      Skalierung auf großen HPC-Systemen (68GX21007F)},
      pid          = {G:(DE-HGF)POF4-5122 / G:(DE-HGF)POF4-5112 /
                      G:(DE-Juel-1)ATML-X-DEV / G:(DE-Juel-1)68GX21007F},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/ARXIV.2504.10013},
      url          = {https://juser.fz-juelich.de/record/1049808},
}