% IMPORTANT: The following is UTF-8 encoded. This means that in the presence % of non-ASCII characters, it will not work with BibTeX 0.99 or older. % Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or % “biber”. @MISC{Meinke:1034809, author = {Meinke, Jan and Herten, Andreas and Hrywniak, Markus and Kraus, Jiri and Badwaik, Jayesh and Haghighi Mood, Kaveh}, title = {{GPU} {P}rogramming {P}art 2: {A}dvanced {GPU} {P}rogramming}, reportid = {FZJ-2024-07562}, year = {2024}, abstract = {GPU-accelerated computing drives current scientific research. Writing fast numeric algorithms for GPUs offers high application performance by offloading compute-intensive portions of the code to a GPU.This advanced course consists of modules providing more in-depth coverage of multi-GPU programming, modern CUDA concepts, CUDA Fortran, and portable programming models such as OpenACC and C++ parallel STL algorithms.Topics covered will includeA) Advanced Multi-GPU Programming with MPIB) Advanced Multi-GPU Programming with NCCL and NVSHMEMC) Advanced and Modern CUDA Concepts (Cooperative Groups, CUDA Graphs, CUB Primitives, Modern C++ Programming)D) CUDA FortranE) GPU Programming with Abstractions (OpenACC, Standard Language Programming (pSTL))}, month = {Jun}, date = {2024-06-03}, organization = {JSC - as part of the Training Programme of Forschungszentrum Jülich, Jülich / online (Germany), 3 Jun 2024 - 7 Jun 2024}, subtyp = {Other}, cin = {JSC}, cid = {I:(DE-Juel1)JSC-20090406}, pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs (SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs) and Research Groups (POF4-511) / 5122 - Future Computing $\&$ Big Data Systems (POF4-512) / ATML-X-DEV - ATML Accelerating Devices (ATML-X-DEV)}, pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5122 / G:(DE-Juel-1)ATML-X-DEV}, typ = {PUB:(DE-HGF)17}, doi = {10.34734/FZJ-2024-07562}, url = {https://juser.fz-juelich.de/record/1034809}, }