% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Villamar:1044380,
author = {Villamar, Jose and Tiddia, Gianmarco and Sergi, Luca and
Babu, Pooja and Pontisso, Luca and Simula, Francesco and
Lonardo, Alessandro and Pastorelli, Elena and Paolucci, Pier
Stanislao and Golosio, Bruno and Senk, Johanna},
title = {{NEST} {GPU} simulations scale up to networks of billions
of spiking neurons and trillions of synapses},
school = {RWTH Aachen},
reportid = {FZJ-2025-03154},
year = {2025},
abstract = {Efficient simulation of large-scale spiking neuronal
networks is important for neuroscientific research, and both
the simulation speed and the time it takes to instantiate
the network in computer memory are key factors. NEST GPU is
a GPU-based simulator under the NEST Initiative written in
CUDA-C++ that demonstrates high simulation speeds with
models of various network sizes on single-GPU and multi-GPU
systems [1,2,3]. On the path toward models of the whole
brain, neuroscientists show an increasing interest in
studying networks that are larger by several orders of
magnitude. Here, we show the performance of our simulation
technology with a scalable network model across multiple
network sizes approaching human cortex magnitudes.For this,
we propose a novel method to efficiently instantiate large
networks on multiple GPUs in parallel. Our approach relies
on the deterministic initial state of pseudo-random number
generators (PRNGs). While requiring synchronization of
network construction directives between MPI processes and a
small memory overhead, this approach enables dynamical
neuron creation and connection at runtime. The method is
evaluated through a two-population recurrently connected
network model designed for benchmarking an arbitrary number
of GPUs while maintaining first-order network statistics
across scales.The benchmarking model was tested during an
exclusive reservation of the LEONARDO Booster cluster. While
keeping constant the number of neurons and incoming synapses
to each neuron per GPU, we performed several simulation runs
exploiting in parallel from 400 to 12,000 (full system)
GPUs. Each GPU device contained approximately 281 thousand
neurons and 3.1 billion synapses. Our results show network
construction times of less than a second using the full
system and stable dynamics across scales. At full system
scale, the network model was composed of approximately 3.37
billion neurons and 37.96 trillion synapses $(~25\%$ human
cortex).To conclude, our novel approach enabled network
model instantiation of magnitudes nearing human cortex scale
while keeping fast construction times, on average of 0.5s
across trials. The stability of dynamics and performance
across scales obtained in our model is a proof of
feasibility paving the way for biologically more plausible
and detailed brain scale models. [1]
https://doi.org/10.3389/fncom.2021.627620 . [2]
https://doi.org/10.3389/fninf.2022.883333 . [3]
https://doi.org/10.3390/app13179598},
month = {Jul},
date = {2025-07-05},
organization = {34th Annual Computational Neuroscience
Meeting, Florence (Italy), 5 Jul 2025 -
9 Jul 2025},
subtyp = {After Call},
cin = {IAS-6 / JSC},
cid = {I:(DE-Juel1)IAS-6-20130828 / I:(DE-Juel1)JSC-20090406},
pnm = {5232 - Computational Principles (POF4-523) / 5235 -
Digitization of Neuroscience and User-Community Building
(POF4-523) / HiRSE - Helmholtz Platform for Research
Software Engineering (HiRSE-20250220) / JL SMHB - Joint Lab
Supercomputing and Modeling for the Human Brain (JL
SMHB-2021-2027)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5235 /
G:(DE-Juel-1)HiRSE-20250220 / G:(DE-Juel1)JL SMHB-2021-2027},
typ = {PUB:(DE-HGF)24},
doi = {10.34734/FZJ-2025-03154},
url = {https://juser.fz-juelich.de/record/1044380},
}