% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Scheins:894866,
author = {Scheins, J. J. and Lenz, Matthias and Pietrzyk, U. and
Shah, N. J. and Lerche, C.},
title = {{H}igh-throughput, accurate {M}onte {C}arlo simulation on
{CPU} hardware for {PET} applications},
journal = {Physics in medicine and biology},
volume = {66},
number = {18},
issn = {1361-6560},
address = {Bristol},
publisher = {IOP Publ.},
reportid = {FZJ-2021-03437},
pages = {185001 -},
year = {2021},
abstract = {Monte Carlo simulations (MCS) represent a fundamental
approach to modelling the photon interactions in positron
emission tomography (PET). A variety of PET-dedicated MCS
tools are available to assist and improve PET imaging
applications. Of these, GATE has evolved into one of the
most popular software for PET MCS because of its accuracy
and flexibility. However, simulations are extremely
time-consuming. The use of graphics processing units (GPU)
has been proposed as a solution to this, with reported
acceleration factors about 400–800. These factors refer to
GATE benchmarks performed on a single CPU core.
Consequently, CPU-based MCS can also be easily accelerated
by one order of magnitude or beyond when exploiting
multi-threading on powerful CPUs. Thus, CPU-based
implementations become competitive when further
optimisations can be achieved. In this context, we have
developed a novel, CPU-based software called the PET physics
simulator (PPS), which combines several efficient methods to
significantly boost the performance. PPS flexibly applies
GEANT4 cross-sections as a pre-calculated database, thus
obtaining results equivalent to GATE. This is demonstrated
for an elaborated PET scanner with 3-layer block detectors.
All code optimisations yield an acceleration factor of ≈20
(single core). Multi-threading on a high-end CPU workstation
(96 cores) further accelerates the PPS by a factor of 80.
This results in a total speed-up factor of ≈1600, which
outperforms comparable GPU-based MCS by a factor of ≳2.
Optionally, the proposed method of coincidence multiplexing
can further enhance the throughput by an additional factor
of ≈15. The combination of all optimisations corresponds
to an acceleration factor of ≈24 000. In this way, the PPS
can simulate complex PET detector systems with an effective
throughput of 106 photon pairs in less than 10
milliseconds.},
cin = {INM-4 / INM-11 / JARA-BRAIN},
ddc = {530},
cid = {I:(DE-Juel1)INM-4-20090406 / I:(DE-Juel1)INM-11-20170113 /
I:(DE-Juel1)VDB1046},
pnm = {5253 - Neuroimaging (POF4-525)},
pid = {G:(DE-HGF)POF4-5253},
typ = {PUB:(DE-HGF)16},
pubmed = {34380125},
UT = {WOS:000693956100001},
doi = {10.1088/1361-6560/ac1ca0},
url = {https://juser.fz-juelich.de/record/894866},
}