% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Durr:859687,
author = {Durr, Stephan},
title = {{T}hree {D}irac operators on two architectures with one
piece of code and no hassle},
volume = {LATTICE2018},
address = {Trieste},
publisher = {SISSA},
reportid = {FZJ-2019-00526},
series = {Proceedings of Science},
pages = {033},
year = {2018},
abstract = {A simple minded approach to implement three discretizations
of the Dirac operator (staggered, Wilson, Brillouin) on two
architectures (KNL and core i7) is presented. The idea is to
use a high-level compiler along with OpenMP parallelization
and SIMD pragmas, but to stay away from cache-line
optimization and/or assembly-tuning. The implementation is
for $N_v$ right-hand-sides, and this extra index is used to
fill the SIMD pipeline. On one KNL node single precision
performance figures for $N_c=3,$ $N_v=12$ read 475 Gflop/s,
345 Gflop/s, and 790 Gflop/s for the three discretization
schemes, respectively.},
month = {Jul},
date = {2018-07-22},
organization = {36th Annual International Symposium on
Lattice Field Theory, Lattice 2018,
East Lansing (USA), 22 Jul 2018 - 28
Jul 2018},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {511 - Computational Science and Mathematical Methods
(POF3-511)},
pid = {G:(DE-HGF)POF3-511},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
eprint = {1808.05506},
howpublished = {arXiv:1808.05506},
archivePrefix = {arXiv},
SLACcitation = {$\%\%CITATION$ = $arXiv:1808.05506;\%\%$},
url = {https://juser.fz-juelich.de/record/859687},
}