% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{DiNapoli:150535,
author = {Di Napoli, Edoardo and Berljafa, Mario},
title = {{P}reconditioning {C}hebyshev subspace iteration applied to
sequences of dense eigenproblems in ab initio simulations},
reportid = {FZJ-2014-00589},
year = {2013},
abstract = {Research in several branches of chemistry and materials
science relies on large ab initio numerical simulations. The
majority of these simulations are based on computational
methods developed within the framework of Density Functional
Theory (DFT) [1]. Among all the DFT-based methods the
Full-potential Linearized Augmented Plane Wave (FLAPW) [2,
3] method constitutes the most precise computational
framework to calculate ground state energy of periodic and
crystalline materials. FLAPW provides the means to solve a
high-dimensional quantum mechanical problem by representing
it as a non-linear generalized eigenvalue problem which is
solved self-consistently through a series of successive
outer-iteration cycles. As a consequence each
self-consistent simulation is made of dozens of sequences of
dense generalized eigenproblems P : Ax = λBx. Each
sequence, P1 , . . . Pi . . . PN , groups together
eigenproblems with increasing outer-iteration index i.
Successive eigenproblems in a FLAPW-generated sequence
possess a high degree of correlation. In particular it has
been demonstrated that eigenvectors of adjacent
eigenproblems become progressively more collinear to each
other as the outer-iteration index increases [4]. This
result suggests one could use eigenvectors, computed at a
certain outer-iteration, as approximate solutions to improve
the performance of the eigensolver at the next one. In order
to maximally exploit the approximate solution, we developed
a subspace iteration method augmented with an optimized
Chebyshev polynomial accelerator together with an efficient
locking mechanism (ChFSI). The resulting eigensolver was
implemented in C language and parallelized for both shared
and distributed architectures. Numerical tests show that,
when the eigensolver is preconditioned with approximate
solutions instead of random vectors, it achieves up to a 5X
speedup. Moreover ChFSI takes great advantage of
computational resources by obtaining levels of efficiency up
to 80 $\%$ of the theoretical peak performance. In
particular, by making better use of massively parallel
architectures, the distributed memory version will allow the
FLAPW method users to simulate larger physical systems than
are currently accessible. Additionally, despite the
eigenproblems in the sequence being relatively large and
dense, the parallel ChFSI preconditioned with approximate
solutions performs substantially better than the
corresponding direct eigensolvers, even for a significant
portion of the sought-after spectrum. [1] R. M. Dreizler,
and E. K. U. Gross, Density Functional Theory
(Springer-Verlag, 1990) [2] A. J. Freeman, H. Krakauer, M.
Weinert, and E. Wimmer, Phys. Rev. B 24 (1981) 864. [3] A.
J. Freeman, and H. J. F. Jansen, Phys. Rev. B 30 (1984) 561
[4] E. Di Napoli, S. Blu ̈gel, and P. Bientinesi, Comp.
Phys. Comm. 183 (2012), pp. 1674- 1682, [arXiv:1108.2594]},
month = {Jun},
date = {2013-06-24},
organization = {Numerical Analysis and Scientific
Computation with Applications, Calais
(France), 24 Jun 2013 - 26 Jun 2013},
subtyp = {Invited},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {411 - Computational Science and Mathematical Methods
(POF2-411) / Simulation and Data Laboratory Quantum
Materials (SDLQM) (SDLQM)},
pid = {G:(DE-HGF)POF2-411 / G:(DE-Juel1)SDLQM},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/150535},
}