% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INBOOK{Peyser:834363,
      author       = {Peyser, Alexander},
      title        = {{N}est{MC}: {A} new multi-compartment neuronal network
                      simulator},
      volume       = {FZJ-JSC-IB-2017-01},
      address      = {Jülich},
      publisher    = {Forschungszentrum Jülich Jülich Supercomputing Centre},
      reportid     = {FZJ-2017-04336},
      series       = {JSC Internal Report},
      pages        = {31-36},
      year         = {2017},
      comment      = {JUQUEEN Extreme Scaling Workshop 2017},
      booktitle     = {JUQUEEN Extreme Scaling Workshop 2017},
      abstract     = {NestMC is a prototype simulator for neuronal networks
                      composed of morphologically detailed neurons.This new code
                      is being designed for the new generation of HPC
                      infrastructure composed of massively parallel and
                      heterogeneous architectures.Planned architectures include
                      `normal' non-vectorized CPUs, vectorized CPUs such as KNL,
                      GPUs and other boosters such as FPGAs.For OpenMP, the
                      current architecture with 1 thread per rank handling all
                      spike communications and exchange scales well up to 2048
                      nodes, and continues to give performance gains up to full
                      JUQUEEN.Using threading pools that partially implement the
                      functionality of TBB, we see good weak-scaling up to 4096
                      nodes and can expect to see performance gains up to JUQUEEN
                      scale.For more complex neuron models and morphologies which
                      increase the ratio of computation time to communication
                      time, weak scaling should be significantly improved; the
                      cases tested are 'worst case scenarios' relative to
                      production runs.With this workshop, we identified the limits
                      of weak-scaling on the current architecture.This motivated
                      the development of a threading backend for architectures
                      where TBB is not available.Since the communication time is
                      dominated by processing the global spike buffers, a dry-run
                      mode has been developed taking advantage of this performance
                      profile, which will allow us to estimate these results using
                      negligible resources.},
      month         = {Jan},
      date          = {2017-01-23},
      organization  = {Jülich (Germany), 23 Jan 2017 - 25
                       Jan 2017},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511) / 574 - Theory, modelling and simulation
                      (POF3-574) / HBP SGA1 - Human Brain Project Specific Grant
                      Agreement 1 (720270) / SMHB - Supercomputing and Modelling
                      for the Human Brain (HGF-SMHB-2013-2017) / SLNS - SimLab
                      Neuroscience (Helmholtz-SLNS)},
      pid          = {G:(DE-HGF)POF3-511 / G:(DE-HGF)POF3-574 /
                      G:(EU-Grant)720270 / G:(DE-Juel1)HGF-SMHB-2013-2017 /
                      G:(DE-Juel1)Helmholtz-SLNS},
      typ          = {PUB:(DE-HGF)7},
      url          = {https://juser.fz-juelich.de/record/834363},
}