% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Herten:893756,
author = {Herten, Andreas},
title = {{JUWELS} {B}ooster - {E}arly {U}ser {E}xperiences},
reportid = {FZJ-2021-02811},
year = {2021},
abstract = {Over the last few years, GPUs became ubiquitous in HPC
installations around the world. Today, they provide the main
source of performance in a number of Top500 machines - for
example Summit, Sierra, and JUWELS Booster. Also for the
upcoming Exascale era, GPUs are selected as key enablers and
will be installed numerously. While individual GPU devices
already offer plenty of performance (O (10) TFLOP/s FP64),
current and next-generation super-computers employ them in
the thousands. Using these machines to the fullest extend
means not only utilizing individual devices efficiently, but
using the entire interconnected system of devices
thoroughly.JUWELS Booster is a recently installed Tier-0/1
system at Jülich Supercomputing Centre (JSC), currently the
7th-fastest supercomputer in the world, and the fastest in
Europe. JUWELS Booster features 936 nodes, each equipped
with 4 NVIDIA A100 Tensor Core GPUs and 4 Mellanox HDR200
InfiniBand HCAs. The peak performance of all GPUs together
sums up to 73 PFLOP/s and it features a DragonFly+ network
topology with 800 Gbit/s network injection bandwidth per
node.During installation of JUWELS Booster, a selected set
of applications were given access to the system as part of
the JUWELS Booster Early Access Program. To prepare for
their first compute time allocation, scientific users were
able to gain first experiences on the machine. They gave
direct feedback to the system operations team during
installation and beyond. Close collaboration was facilitated
with the application support staff of JSC, giving unique
insights into the individual processes of utilizing a
brand-new large-sale system for a first time. Likewise,
performance profiles of applications could be studied and
collaboratively analyzed, employing available tools and
methods. Performance limiters of the specific application on
the platform were identified and proposals for improvement
developed.This talk will present first experiences with
JUWELS Booster and the applications utilizing the system
during its first months. Applied methods for onboarding,
analysis, and optimization will be shown and assessed.
Highlights of the state of the art of performance analysis
and modeling for GPUs will be presented with concrete
examples from the JUWELS Booster Early Access Program.},
month = {Jun},
date = {2021-06-21},
organization = {The 30th International Symposium on
High-Performance Parallel and
Distributed Computing, PERMAVOST
Workshop, Virtual (Sweden), 21 Jun 2021
- 25 Jun 2021},
subtyp = {Plenary/Keynote},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5121 - Supercomputing $\&$ Big Data Facilities (POF4-512) /
5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / ATML-X-DEV - ATML
Accelerating Devices (ATML-X-DEV)},
pid = {G:(DE-HGF)POF4-5121 / G:(DE-HGF)POF4-5112 /
G:(DE-Juel-1)ATML-X-DEV},
typ = {PUB:(DE-HGF)6},
UT = {WOS:001322551200001},
doi = {10.1145/3452412.3462752},
url = {https://juser.fz-juelich.de/record/893756},
}