% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Villamar:1043062,
author = {Villamar, Jose and Golosio, Bruno and Tiddia, Gianmarco and
Babu, Pooja and Sergi, Luca and Pontisso, Luca and Simula,
Francesco and Lonardo, Alessandro and Pastorelli, Elena and
Paolucci, Pier Stanislao and Senk, Johanna},
title = {{NEST} {GPU} simulations scale up to networks of billions
of spiking neurons and trillions of synapses},
school = {RWTH Aachen},
reportid = {FZJ-2025-02753},
year = {2025},
abstract = {Efficient simulation of large-scale spiking neuronal
networks is important for neuroscientific research, and both
the simulation speed and the time it takes to instantiate
the network in computer memory are key factors. NEST GPU is
a GPU-based simulator under the NEST Initiative written in
CUDA-C++ that demonstrates high simulation speeds with
models of various network sizes on single-GPU and multi-GPU
systems
[doi/10.3389/fncom.2021.627620][doi/10.3389/fninf.2022.883333][doi/10.3390/app13179598].
On the path toward models of the whole brain,
neuroscientists show an increasing interest in studying
networks that are larger by several orders of magnitude.
Here, we show the performance of our simulation technology
with a scalable network model across multiple network sizes
approaching human cortex magnitudes.For this, we propose a
novel method to efficiently instantiate large networks on
multiple GPUs in parallel. Our approach relies on the
deterministic initial state of pseudo-random number
generators (PRNGs). While requiring synchronization of
network construction directives between MPI processes and a
small memory overhead, this approach enables dynamical
neuron creation and connection at runtime. The method is
evaluated through a two-population recurrently connected
network model designed for benchmarking an arbitrary number
of GPUs while maintaining first-order network statistics
across scales.The benchmarking model was tested during an
exclusive reservation of the LEONARDO Booster cluster. While
keeping constant the number of neurons and incoming synapses
to each neuron per GPU, we performed several simulation runs
exploiting in parallel from 400 to 12,000 (full system)
GPUs. Each GPU device contained approximately 281 thousand
neurons and 3.1 billion synapses. Our results show network
construction times of less than a second using the full
system and stable dynamics across scales. At full system
scale, the network model was composed of approximately 3.37
billion neurons and 37.96 trillion synapses $(~25\%$ human
cortex).To conclude, our novel approach enabled network
model instantiation of magnitudes nearing human cortex scale
while keeping fast construction times, on average of 0.5s
across trials. The stability of dynamics and performance
across scales obtained in our model is a proof of
feasibility paving the way for biologically more plausible
and detailed brain scale models.},
month = {May},
date = {2025-05-28},
organization = {International Workshop on Reliable and
Sustainable Neuromorphic Hardware, York
(UK), 28 May 2025 - 30 May 2025},
subtyp = {After Call},
cin = {IAS-6 / JSC},
cid = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)JSC-20090406},
pnm = {5232 - Computational Principles (POF4-523) / 5235 -
Digitization of Neuroscience and User-Community Building
(POF4-523) / HiRSE - Helmholtz Platform for Research
Software Engineering (HiRSE-20250220) / JL SMHB - Joint Lab
Supercomputing and Modeling for the Human Brain (JL
SMHB-2021-2027)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5235 /
G:(DE-Juel-1)HiRSE-20250220 / G:(DE-Juel1)JL SMHB-2021-2027},
typ = {PUB:(DE-HGF)24},
doi = {10.34734/FZJ-2025-02753},
url = {https://juser.fz-juelich.de/record/1043062},
}