% IMPORTANT: The following is UTF-8 encoded. This means that in the presence % of non-ASCII characters, it will not work with BibTeX 0.99 or older. % Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or % “biber”. @INPROCEEDINGS{Badwaik:1030182, author = {Badwaik, Jayesh and Herten, Andreas and Veneva, Milena}, title = {{O}ptimizing an {LBM} {A}pplication {U}sing {CUDA} {G}raphs}, reportid = {FZJ-2024-05240}, year = {2023}, abstract = {With increasing focus on scalability and performance of high performance computing applications, it has become important for the simulation softwares to be able to utilize the underlying hardware as comprehensively to its maximum performance. waLBerla is a multiphysics software framework that has achieved high scalability and performance. It achieves this excellent performance due to architecture specific code generation algorithms combined with efficient communication and parallel data structures like BlockForest. In this work, we attempt to improve the GPU utilization of an Lattice-Boltzmann Method (LBM) software.}, month = {May}, date = {2023-05-22}, organization = {ISC High Performance 2023, Hamburg (Germany), 22 May 2023 - 25 May 2023}, subtyp = {Other}, cin = {JSC}, cid = {I:(DE-Juel1)JSC-20090406}, pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs) and Research Groups (POF4-511) / SCALABLE - SCAlable LAttice Boltzmann Leaps to Exascale (956000) / ATML-X-DEV - ATML Accelerating Devices (ATML-X-DEV)}, pid = {G:(DE-HGF)POF4-5112 / G:(EU-Grant)956000 / G:(DE-Juel-1)ATML-X-DEV}, typ = {PUB:(DE-HGF)24}, doi = {10.34734/FZJ-2024-05240}, url = {https://juser.fz-juelich.de/record/1030182}, }