% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{DiNapoli:911757,
      author       = {Di Napoli, Edoardo and Wu, Xinzhe},
      title        = {{C}h{AS}({E})ing {H}ermitian dense eigenproblems with
                      subspace iteration on large scale hybrid platforms with
                      application to {DFT}},
      reportid     = {FZJ-2022-05009},
      year         = {2022},
      abstract     = {As modern massively parallel clusters are getting larger
                      with beefier compute nodes, traditional parallel
                      eigensolvers, such as direct solvers, struggle keeping the
                      pace with the hardware evolution and being able to scale
                      efficiently due to additional layers of communication and
                      synchronization. This difficulty is especially important
                      when porting traditional libraries to heterogeneous
                      computing architectures equipped with accelerators, such as
                      Graphics Processing Unit (GPU). Recently, there have been
                      significant scientific contributions to the development of
                      filter-based subspace eigensolver to compute partial
                      eigenspectrum. The simpler structure of these type of
                      algorithms makes for them easier to avoid the communication
                      and synchronization bottlenecks typical of direct solvers.
                      The Chebyshev Accelerated Subspace Eigensolver (ChASE) is a
                      modern subspace eigensolver to compute partial extremal
                      eigenpairs of large-scale Hermitian eigenproblems with the
                      acceleration of a filter based on Chebyshev polynomials.In
                      this talk, we report on the latest versions of the ChASE
                      library by describing (i) its support for distributed hybrid
                      CPU-multi-GPU computing architectures, and (ii) the very
                      recent development of partial distribution of a combination
                      of Householder- Cholesk-QR factorization and its impact on
                      time-to-solution and memory footprint. Benchmarks on a
                      modern heterogeneous cluster (JURECA-DC) based on double
                      socket AMD Epyc Rome CPU and 4 NVIDIA GPUs per node are
                      provided. Typical application of ChASE are sequences of
                      large Hermitian eigenproblems as they appear in LAPW
                      methods. ChASE is also application-code ready, in the sense
                      that comes with a simple C++/Fortran interface that allows
                      its integration with typical electronic structure legacy
                      codes.},
      month         = {Nov},
      date          = {2022-11-15},
      organization  = {CECAM workshop -- Challenges and
                       Advances in Solving Eigenproblems for
                       Electronic-Structure Theory, Lausanne
                       (Switzerland), 15 Nov 2022 - 18 Nov
                       2022},
      subtyp        = {Invited},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / Simulation and Data
                      Laboratory Quantum Materials (SDLQM) (SDLQM)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(DE-Juel1)SDLQM},
      typ          = {PUB:(DE-HGF)6},
      url          = {https://juser.fz-juelich.de/record/911757},
}