% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{DiNapoli:1019352,
author = {Di Napoli, Edoardo and Wu, Xinzhe},
title = {{A}dvancing the {D}istributed {M}ulti-{GPU} {C}h{ASE}
{L}ibrary through {A}lgorithm {O}ptimization and {NCCL}
{L}ibrary},
reportid = {FZJ-2023-05322},
year = {2023},
abstract = {As supercomputers become larger with powerful Graphics
Processing Unit (GPU), traditional direct eigensolvers
struggle to keep up with the hardware evolution and scale
efficiently due to communication and synchronization
demands. Subspace eigensolvers, like the Chebyshev
Accelerated Subspace Eigensolver (ChASE), have a simpler
structure and can overcome communication and synchronization
bottlenecks. ChASE is a modern subspace eigensolver that
uses Chebyshev polynomials to accelerate the computation of
extremal eigenpairs of dense Hermitian eigenproblems. In
this work we show how we have modified ChASE by rethinking
its memory layout, introducing a novel parallelization
scheme, switching to a more performing
communication-avoiding algorithm for one of its inner
module, and substituting MPI library by vendor-optimized
NCCL library. The resulting library can tackle dense
problems with size up to $N=O(10^6),$ and scales
effortlessly up to the full 900 nodes---each one powered by
4xA100 NVIDIA GPUs---of the JUWELS Booster hosted at the
Jülich Supercomputing Centre.},
month = {Nov},
date = {2023-11-12},
organization = {14th Workshop on Latest Advances in
Scalable Algorithms for Large-Scale
Heterogeneous Systems, Denver (USA), 12
Nov 2023 - 17 Nov 2023},
subtyp = {After Call},
cin = {JSC / CASA},
cid = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)CASA-20230315},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / Simulation and Data
Laboratory Quantum Materials (SDLQM) (SDLQM)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-Juel1)SDLQM},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1019352},
}