% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Wu:907602,
author = {Wu, Xinzhe and Davidovic, Davor and Achilles, Sebastian and
Di Napoli, Edoardo},
title = {{C}h{ASE} - {A} {D}istributed {H}ybrid {CPU}-{GPU}
{E}igensolver for {L}arge-scale {H}ermitian {E}igenvalue
{P}roblems},
publisher = {ACM New York, NY, USA},
reportid = {FZJ-2022-02101},
pages = {Article No.: 9},
year = {2022},
comment = {Proceedings of the Platform for Advanced Scientific
Computing Conference - ACM New York, NY, USA, 2022. - ISBN
9781450394109 - doi:10.1145/3539781.3539792},
booktitle = {Proceedings of the Platform for
Advanced Scientific Computing
Conference - ACM New York, NY, USA,
2022. - ISBN 9781450394109 -
doi:10.1145/3539781.3539792},
abstract = {As modern massively parallel clusters are getting larger
with beefier compute nodes, traditional parallel
eigensolvers, such as direct solvers, struggle keeping the
pace with the hardware evolution and being able to scale
efficiently due to additional layers of communication and
synchronization. This difficulty is especially important
when porting traditional libraries to heterogeneous
computing architectures equipped with accelerators, such as
Graphics Processing Unit (GPU). Recently, there have been
significant scientific contributions to the development of
filter-based subspace eigensolver to compute partial
eigenspectrum. The simpler structure of these type of
algorithms makes for them easier to avoid the communication
and synchronization bottlenecks typical of direct solvers.
The Chebyshev Accelerated Subspace Eigensolver (ChASE) is a
modern subspace eigensolver to compute partial extremal
eigenpairs of large-scale Hermitian eigenproblems with the
acceleration of a filter based on Chebyshev polynomials. In
this work, we extend our previous work on ChASE by adding
support for distributed hybrid CPU-multi-GPU computing
architectures. Our tests show that ChASE achieves very good
scaling performance up to 144 nodes with 526 NVIDIA A100
GPUs in total on dense eigenproblems of size up to $360$k.},
month = {Jun},
date = {2022-06-27},
organization = {Platform for Advanced Scientific
Computing, Basel (Switzerland), 27 Jun
2022 - 29 Jun 2022},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / PRACE-6IP - PRACE
6th Implementation Phase Project (823767) / Simulation and
Data Laboratory Quantum Materials (SDLQM) (SDLQM)},
pid = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)823767 /
G:(DE-Juel1)SDLQM},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
eprint = {2205.02491},
howpublished = {arXiv:2205.02491},
archivePrefix = {arXiv},
SLACcitation = {$\%\%CITATION$ = $arXiv:2205.02491;\%\%$},
doi = {10.1145/3539781.3539792},
url = {https://juser.fz-juelich.de/record/907602},
}