% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MISC{Herten:915940,
author = {Herten, Andreas and Meinke, Jan and Haghighi Mood, Kaveh
and Kraus, Jiri and Hrywniak, Markus},
title = {{GPU} {P}rogramming with {CUDA}},
reportid = {FZJ-2022-05803},
year = {2022},
note = {Online course within the PRACE and FZJ training program.},
abstract = {GPU-accelerated computing drives current scientific
research. Writing fast numeric algorithms for GPUs offers
high application performance by offloading compute-intensive
portions of the code to an NVIDIA GPU. The course covers
basic aspects of GPU architectures and programming. Focus is
on the usage of the parallel programming language CUDA C/C++
which allows maximum control of NVIDIA GPU hardware.
Examples of increasing complexity are used to demonstrate
optimization and tuning of scientific applications. Topics
covered will include: Introduction to GPU/Parallel
computing; Programming model CUDA; GPU libraries like CuBLAS
and CuFFT; Tools for debugging and profiling; Performance
optimizations; Advanced GPU programming model; CUDA Fortran
in a nutshell.This course is a PRACE training course.},
month = {Apr},
date = {2022-04-25},
organization = {PRACE Training Course at JSC, online,
25 Apr 2022 - 29 Apr 2022},
subtyp = {Other},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / 5111 - Domain-Specific
Simulation $\&$ Data Life Cycle Labs (SDLs) and Research
Groups (POF4-511) / PRACE-6IP - PRACE 6th Implementation
Phase Project (823767) / ATML-X-DEV - ATML Accelerating
Devices (ATML-X-DEV)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5111 /
G:(EU-Grant)823767 / G:(DE-Juel-1)ATML-X-DEV},
typ = {PUB:(DE-HGF)17},
url = {https://juser.fz-juelich.de/record/915940},
}