% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Penke:1034059,
author = {Penke, Carolin},
title = {{A}n {I}ntroduction to {L}arge {L}anguage {M}odels},
reportid = {FZJ-2024-06880},
year = {2024},
abstract = {Large Language Models (LLMs) have revolutionized the field
of artificial intelligence, enabling advanced text
generation and understanding. This talk provides a concise
overview of LLMs, focusing on their development,
architecture, and implementation. We explain key concepts,
and give details on the backbone of modern LLMs: the
transformer architecture and its innovative attention
mechanism. To be able to train these models on
supercomputers, advanced parallelization techniques are
needed. Recent advancements and promising trends are
identified. Through the lens of the OpenGPT-X project, this
presentation will highlight the collaborative efforts in
developing multilingual, open-source LLMs.},
month = {Jun},
date = {2024-06-06},
organization = {Women in Data Science Conference
Chemnitz, Chemnitz (Germany), 6 Jun
2024 - 7 Jun 2024},
subtyp = {Plenary/Keynote},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / 5122 - Future Computing
$\&$ Big Data Systems (POF4-512) / OpenGPT-X - Aufbau eines
Gaia-X Knotens für große KI-Sprachmodelle und innovative
Sprachapplikations-Services; Teilvorhaben: Optimierung und
Skalierung auf großen HPC-Systemen (68GX21007F) / JuWinHPC
- Jülich Women in HPC (JuWinHPC)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5122 /
G:(DE-Juel-1)68GX21007F / G:(DE-Juel-1)JuWinHPC},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1034059},
}