% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MISC{Meinke:1034809,
author = {Meinke, Jan and Herten, Andreas and Hrywniak, Markus and
Kraus, Jiri and Badwaik, Jayesh and Haghighi Mood, Kaveh},
title = {{GPU} {P}rogramming {P}art 2: {A}dvanced {GPU}
{P}rogramming},
reportid = {FZJ-2024-07562},
year = {2024},
abstract = {GPU-accelerated computing drives current scientific
research. Writing fast numeric algorithms for GPUs offers
high application performance by offloading compute-intensive
portions of the code to a GPU.This advanced course consists
of modules providing more in-depth coverage of multi-GPU
programming, modern CUDA concepts, CUDA Fortran, and
portable programming models such as OpenACC and C++ parallel
STL algorithms.Topics covered will includeA) Advanced
Multi-GPU Programming with MPIB) Advanced Multi-GPU
Programming with NCCL and NVSHMEMC) Advanced and Modern CUDA
Concepts (Cooperative Groups, CUDA Graphs, CUB Primitives,
Modern C++ Programming)D) CUDA FortranE) GPU Programming
with Abstractions (OpenACC, Standard Language Programming
(pSTL))},
month = {Jun},
date = {2024-06-03},
organization = {JSC - as part of the Training
Programme of Forschungszentrum Jülich,
Jülich / online (Germany), 3 Jun 2024
- 7 Jun 2024},
subtyp = {Other},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
(POF4-511) / 5122 - Future Computing $\&$ Big Data Systems
(POF4-512) / ATML-X-DEV - ATML Accelerating Devices
(ATML-X-DEV)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
G:(DE-HGF)POF4-5122 / G:(DE-Juel-1)ATML-X-DEV},
typ = {PUB:(DE-HGF)17},
doi = {10.34734/FZJ-2024-07562},
url = {https://juser.fz-juelich.de/record/1034809},
}