% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{DiNapoli:150536,
author = {Di Napoli, Edoardo},
title = {{I}mproving the performance of applied science numerical
simulations: an application to {D}ensity {F}unctional
{T}heory.},
reportid = {FZJ-2014-00590},
year = {2013},
abstract = {In the early days of numerical simulations, advances were
based on the ingenuity of pioneer scientists writing codes
for relatively simple machines. Nowadays the investigation
of large physical systems requires scaling simulations up to
massively parallel computers whose optimal usage can often
be challenging. On the one hand the algorithmic structure of
many legacy codes can be a limiting factor to their
portability on large supercomputers. More importantly in
many cases algorithmic libraries are used as black boxes and
no information coming from the physics of the specific
application is exploited to improve the overall performance
of the simulation. What is needed is a more
interdisciplinary approach where the tools of scientific
computing and knowledge extracted from the specific
application are merged together in a new computational
paradigm. One of the most promising new paradigms borrows
from the "inverse problem" concept and, by reversing the
logical arrow going from mathematical modeling to numerical
simulations, extracts from the latter specific information
that can be used to modify the algorithm. The resulting
methodology, named "reverse simulation", produces an
algorithm variant specifically tailored to the scientific
application. Additionally such a variant can be optimally
implemented for multiple parallel computing architectures.
To demonstrate its applicability I will exemplify the
workings of reverse simulation on a computational method
widely used in the framework of Density Functional Theory
(DFT): the Full-potential Linearized Augmented Plane Wave
(FLAPW) method. FLAPW provides the means to solve a
high-dimensional quantum mechanical problem by representing
it as a non-linear generalized eigenvalue problem which is
solved self-consistently through a series of successive
outer-iteration cycles. By applying the principles of
reverse simulation it can be shown that eigenvectors of
successive eigenproblems become progressively more collinear
to each other as the outer-iteration index increases. This
result suggests that one could use eigenvectors, computed at
a certain outer-iteration, as approximate solutions to
improve the performance of the eigensolver at the next
iteration. In order to maximally exploit the approximate
solution, we developed a subspace iteration method augmented
with an optimized Chebyshev polynomial accelerator together
with an efficient locking mechanism (ChFSI). The resulting
eigensolver was implemented in C language and can be
parallelized for both shared and distributed architectures.
Numerical tests show that, when the eigensolver is
preconditioned with approximate solutions instead of random
vectors, it achieves up to a 5X speedup. Moreover ChFSI
takes great advantage of computational resources by
obtaining levels of efficiency up to $80\%$ of the
theoretical peak performance. In particular, by making
better use of massively parallel architectures, the
distributed memory version will allow users of the FLAPW
method to simulate larger physical systems than are
currently accessible.},
month = {Mar},
date = {2013-03-05},
organization = {Seminar at Columbia University, New
York (United States), 5 Mar 2013},
subtyp = {Invited},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {411 - Computational Science and Mathematical Methods
(POF2-411) / Simulation and Data Laboratory Quantum
Materials (SDLQM) (SDLQM)},
pid = {G:(DE-HGF)POF2-411 / G:(DE-Juel1)SDLQM},
typ = {PUB:(DE-HGF)31},
url = {https://juser.fz-juelich.de/record/150536},
}