% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Penke:1049808,
author = {Penke, Carolin and John, Chelsea Maria and Ebert, Jan and
Kesselheim, Stefan and Herten, Andreas},
title = {{T}raining {LLM}s on {HPC} {S}ystems: {B}est {P}ractices
from the {O}pen{GPT}-{X} {P}roject},
publisher = {arXiv},
reportid = {FZJ-2025-05592, 2504.10013},
year = {2025},
abstract = {The training of large language models (LLMs) requires
substantial computational resources, complex software
stacks, and carefully designed workflows to achieve
scalability and efficiency. This report presents best
practices and insights gained from the OpenGPT-X project, a
German initiative focused on developing open, multilingual
LLMs optimized for European languages. We detail the use of
high-performance computing (HPC) systems, primarily JUWELS
Booster at JSC, for training Teuken-7B, a
7-billion-parameter transformer model. The report covers
system architecture, training infrastructure, software
choices, profiling and benchmarking tools, as well as
engineering and operational challenges.},
keywords = {Distributed, Parallel, and Cluster Computing (cs.DC)
(Other) / FOS: Computer and information sciences (Other) /
C.4; I.2.11; I.2.7; K.6 (Other)},
pnm = {5122 - Future Computing $\&$ Big Data Systems (POF4-512) /
5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / ATML-X-DEV - ATML
Accelerating Devices (ATML-X-DEV) / OpenGPT-X - Aufbau eines
Gaia-X Knotens für große KI-Sprachmodelle und innovative
Sprachapplikations-Services; Teilvorhaben: Optimierung und
Skalierung auf großen HPC-Systemen (68GX21007F)},
pid = {G:(DE-HGF)POF4-5122 / G:(DE-HGF)POF4-5112 /
G:(DE-Juel-1)ATML-X-DEV / G:(DE-Juel-1)68GX21007F},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/ARXIV.2504.10013},
url = {https://juser.fz-juelich.de/record/1049808},
}