% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MISC{GarciadeGonzalo:916372,
author = {Garcia de Gonzalo, Simon and Oden, Lena and Herten, Andreas
and Hrywniak, Markus and Kraus, Jiri},
title = {{E}fficient {D}istributed {GPU} {P}rogramming for
{E}xascale},
reportid = {FZJ-2022-06173},
year = {2022},
abstract = {Over the past years, GPUs became ubiquitous in HPC
installations around the world. Today, they provide the
majority of performance of some of the largest
supercomputers (e.g. Summit, Sierra, JUWELS Booster). This
trend continues in the pre-exascale and exascale systems
(LUMI, Leonardo; Perlmutter, Frontier): GPUs are chosen as
the core computing devices to enter this next era of HPC. To
take advantage of future GPU-accelerated systems with tens
of thousands of devices, application developers need to have
the propers skills and tools to understand, manage, and
optimize distributed GPU applications. In this tutorial,
participants will learn techniques to efficiently program
large-scale multi-GPU systems. While programming multiple
GPUs with MPI is explained in detail, advanced tuning
techniques and complementary programming models like NCCL
and NVSHMEM are presented as well. Tools for analysis are
shown and used to motivate and implement performance
optimizations. The tutorial is a combination of lectures and
hands-on exercises, using Europe's fastest supercomputer,
JUWELS Booster with NVIDIA GPUs, for interactive learning
and discovery.},
month = {May},
date = {2022-05-29},
organization = {ISC High Performance 2022, Hamburg
(Germany), 29 May 2022 - 29 May 2022},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5122 - Future Computing $\&$ Big Data Systems (POF4-512) /
5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / 5111 - Domain-Specific
Simulation $\&$ Data Life Cycle Labs (SDLs) and Research
Groups (POF4-511) / ATML-X-DEV - ATML Accelerating Devices
(ATML-X-DEV)},
pid = {G:(DE-HGF)POF4-5122 / G:(DE-HGF)POF4-5112 /
G:(DE-HGF)POF4-5111 / G:(DE-Juel-1)ATML-X-DEV},
typ = {PUB:(DE-HGF)17},
doi = {10.5281/ZENODO.6603470},
url = {https://juser.fz-juelich.de/record/916372},
}