% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Baumeister:1019082,
author = {Baumeister, Paul F and Nassyr, Stepan},
title = {tf{QMR}gpu: {A} {GPU}-accelerated linear solver with
block-sparse complex result matrix},
journal = {The journal of supercomputing},
issn = {0920-8542},
address = {Dordrecht [u.a.]},
publisher = {Springer Science + Business Media B.V},
reportid = {FZJ-2023-05135},
year = {2023},
abstract = {Linear solvers are a central component of many applications
in physics and engineering. In this work we present a
software package for simultaneously solving with multiple
right-hand sides using the vast compute performance and
memory bandwidth of graphical processors. Using the
transpose-free quasi minimal residual method iterative
linear solving does not require the implementation of an
adjoint operator. This C++/CUDA software packet has two ways
of being employed. The precompiled version of this library
offers linear solving for single and double precision
block-sparse complex matrices with interfaces to various
programming languages, in particular C, Fortran, Python and
Julia. Furthermore, the core algorithm is available for
custom implementations of any linear operator as a C++
header-only library. We showcase a matrix-free approach of a
custom operator for a finite-difference stencil application
solving the three-dimensional Helmholtz equation and compare
the performance of the matrix-free approach against the
block-sparse matrix version, both on NVIDIA hardware.},
ddc = {620},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511)},
pid = {G:(DE-HGF)POF4-5111},
typ = {PUB:(DE-HGF)25},
doi = {10.21203/rs.3.rs-3574519/v1},
url = {https://juser.fz-juelich.de/record/1019082},
}