% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@MISC{Meinke:1034809,
      author       = {Meinke, Jan and Herten, Andreas and Hrywniak, Markus and
                      Kraus, Jiri and Badwaik, Jayesh and Haghighi Mood, Kaveh},
      title        = {{GPU} {P}rogramming {P}art 2: {A}dvanced {GPU}
                      {P}rogramming},
      reportid     = {FZJ-2024-07562},
      year         = {2024},
      abstract     = {GPU-accelerated computing drives current scientific
                      research. Writing fast numeric algorithms for GPUs offers
                      high application performance by offloading compute-intensive
                      portions of the code to a GPU.This advanced course consists
                      of modules providing more in-depth coverage of multi-GPU
                      programming, modern CUDA concepts, CUDA Fortran, and
                      portable programming models such as OpenACC and C++ parallel
                      STL algorithms.Topics covered will includeA) Advanced
                      Multi-GPU Programming with MPIB) Advanced Multi-GPU
                      Programming with NCCL and NVSHMEMC) Advanced and Modern CUDA
                      Concepts (Cooperative Groups, CUDA Graphs, CUB Primitives,
                      Modern C++ Programming)D) CUDA FortranE) GPU Programming
                      with Abstractions (OpenACC, Standard Language Programming
                      (pSTL))},
      month         = {Jun},
      date          = {2024-06-03},
      organization  = {JSC - as part of the Training
                       Programme of Forschungszentrum Jülich,
                       Jülich / online (Germany), 3 Jun 2024
                       - 7 Jun 2024},
      subtyp        = {Other},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
                      Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
                      (POF4-511) / 5122 - Future Computing $\&$ Big Data Systems
                      (POF4-512) / ATML-X-DEV - ATML Accelerating Devices
                      (ATML-X-DEV)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
                      G:(DE-HGF)POF4-5122 / G:(DE-Juel-1)ATML-X-DEV},
      typ          = {PUB:(DE-HGF)17},
      doi          = {10.34734/FZJ-2024-07562},
      url          = {https://juser.fz-juelich.de/record/1034809},
}