% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Durr:1019542,
author = {Durr, Stephan},
title = {{P}ortable {CPU} implementation of {W}ilson, {B}rillouin
and {S}usskind fermions in lattice {QCD}},
journal = {Computer physics communications},
volume = {282},
issn = {0010-4655},
address = {Amsterdam},
publisher = {North Holland Publ. Co.},
reportid = {FZJ-2023-05489},
pages = {108555 -},
year = {2023},
abstract = {A modern Fortran implementation of three Dirac operators
(Wilson, Brillouin, Susskind) in lattice QCD is presented,
based on OpenMP shared-memory parallelization and SIMD
pragmas.The main idea is to apply a Dirac operator to $N_v$
vectors simultaneously, to ease the memory bandwidth
bottleneck.All index computations are left to the compiler
and maximum weight is given to portability and
flexibility.The lattice volume, $N_x N_y N_z N_t$, the
number of colors, $N_c$, and the number of right-hand sides,
$N_v$, are parameters defined at compile time.Several memory
layout options are compared.The code performs well on modern
many-core architectures (480\,Gflop/s, 880\,Gflop/s, and
780\,Gflop/s with $N_v=12$for the three operators in single
precision on a 72-core KNL processor, a $2 \times 24$-core
Skylake node yields similar results).Explicit run-time tests
with CG/BiCGstab inverters confirm that the memory layout is
relevant for the KNL, but less so for the Skylake
architecture.The ancillary code distribution contains all
routines, including the single, double, and mixed precision
Krylov space solvers, to render it self-contained and
ready-to-use.},
cin = {JSC},
ddc = {530},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / DFG project
448374536 - Fortschritte bei einer präzisen ab initio
Bestimmung der Partonen-Struktur von Hadronen (448374536)},
pid = {G:(DE-HGF)POF4-5111 / G:(GEPRIS)448374536},
typ = {PUB:(DE-HGF)16},
UT = {WOS:000876219500004},
doi = {10.1016/j.cpc.2022.108555},
url = {https://juser.fz-juelich.de/record/1019542},
}