% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Pronold:906624,
author = {Pronold, Jari and Jordan, Jakob and Wylie, Brian J. N. and
Kitayama, Itaru and Diesmann, Markus and Kunkel, Susanne},
title = {{R}outing brain traffic through the von {N}eumann
bottleneck: {E}fficient cache usage in spiking neural
network simulation code on general purpose computers},
publisher = {arXiv},
reportid = {FZJ-2022-01560},
year = {2021},
abstract = {Simulation is a third pillar next to experiment and theory
in the study of complex dynamic systems such as biological
neural networks. Contemporary brain-scale networks
correspond to directed graphs of a few million nodes, each
with an in-degree and out-degree of several thousands of
edges, where nodes and edges correspond to the fundamental
biological units, neurons and synapses, respectively. When
considering a random graph, each node's edges are
distributed across thousands of parallel processes. The
activity in neuronal networks is also sparse. Each neuron
occasionally transmits a brief signal, called spike, via its
outgoing synapses to the corresponding target neurons. This
spatial and temporal sparsity represents an inherent
bottleneck for simulations on conventional computers:
Fundamentally irregular memory-access patterns cause poor
cache utilization. Using an established neuronal network
simulation code as a reference implementation, we
investigate how common techniques to recover cache
performance such as software-induced prefetching and
software pipelining can benefit a real-world application.
The algorithmic changes reduce simulation time by up to
$50\%.$ The study exemplifies that many-core systems
assigned with an intrinsically parallel computational
problem can overcome the von Neumann bottleneck of
conventional computer architectures.},
keywords = {Distributed, Parallel, and Cluster Computing (cs.DC)
(Other) / FOS: Computer and information sciences (Other)},
cin = {INM-6 / IAS-6 / INM-10},
cid = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)IAS-6-20130828 /
I:(DE-Juel1)INM-10-20170113},
pnm = {5234 - Emerging NC Architectures (POF4-523) / HBP SGA2 -
Human Brain Project Specific Grant Agreement 2 (785907) /
HBP SGA3 - Human Brain Project Specific Grant Agreement 3
(945539) / DEEP-EST - DEEP - Extreme Scale Technologies
(754304) / ACA - Advanced Computing Architectures (SO-092) /
GRK 2416: MultiSenses-MultiScales: Novel approaches to
decipher neural processing in multisensory integration
(368482240) / ATMLPP - ATML Parallel Performance (ATMLPP)},
pid = {G:(DE-HGF)POF4-5234 / G:(EU-Grant)785907 /
G:(EU-Grant)945539 / G:(EU-Grant)754304 / G:(DE-HGF)SO-092 /
G:(GEPRIS)368482240 / G:(DE-Juel-1)ATMLPP},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/ARXIV.2109.12855},
url = {https://juser.fz-juelich.de/record/906624},
}