% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Klijn:843867,
      author       = {Klijn, Wouter and Canova, Carlos and Baumeister, Paul F.
                      and Yegenoglu, Alper and Denker, Michael and Pleiter, Dirk
                      and Grün, Sonja},
      title        = {{ASSET} for {JULIA}: {E}xecuting {M}assively {P}arallel
                      {S}pike {C}orrelation {A}nalysis on {KNL} {C}luster},
      reportid     = {FZJ-2018-01398},
      year         = {2017},
      abstract     = {Introduction: We developed a statistical analysis method,
                      ASSET, capable of detectingrepeated sequences of synchronous
                      events (SSE) in massively parallel spike trains(Torre et
                      al., 2016). Yet we have not been able to apply ASSET in its
                      full extent, giventhe high computational demand when
                      assessing significance of the SSEs. This challenge,however,
                      can now be overcome with the support from the High
                      Performance Analyticsand Computing Platform (HPAC), and
                      their readily available modern infrastructure.Here we
                      present the first steps towards analyzing
                      electrophysiological recordings withASSET on one of the new
                      pre-commercial procurement machines, JULIA, which isbased on
                      Intel’s new Knights Landing (KNL) processor.Motivation:
                      ASSET is an analysis designed to detect and quantify
                      activity in a synfirechain (Abeles, 1991), a feedforward
                      neuronal network with high convergence and divergenceof
                      connectivity between the layers (groups). Particular to such
                      a network is thatit favors the propagation of synchronous
                      spiking activities, which appear in measurementsas SSEs. In
                      ASSET, the repetitive occurrence of an identical SSE becomes
                      visiblein an intersection matrix as a diagonal structure
                      (DS) (Schrader et al., 2008; Gersteinet al., 2012), which is
                      evaluated automatically for significance. Currently, the
                      ASSETmethod can only be applied to time segments that are
                      considerably shorter than the fullduration of a typical
                      session of massively parallel electrophysiological
                      recordings dueto costly numerical steps in the analysis.
                      However, these numerical computations arecomposed of
                      independent steps and thus ASSET would profit from
                      parallelization. Asecond challenge is the core of the
                      algorithm, which makes extensive use of exponentialand
                      logarithmic operations. These operations are computational
                      expensive and do notlend themselves to easy array
                      vectorization on modern HPC hardware.Methods: After analysis
                      and instrumentation of ASSET, an MPI version of the
                      softwarewas implemented, distributing the workload across
                      multiple compute instances in around-robin manner. After the
                      work on the nodes, the partial results are collected onthe
                      master node and summed for the final results. In a parallel
                      effort we optimized thecore of the ASSET algorithm: the
                      exponential and logarithmic operations are
                      typicallycalculated using Taylor expansions. Approximate
                      methods perform the same mathematicaloperations faster at
                      the expense of an error smaller than 1E-6. This speedup
                      canbe further improved on by (automatic) array vectorization
                      of the code implementingthese methods. These techniques were
                      combined with C implementations using theCython programming
                      interface.Results: The MPI implementation allowed us to
                      leverage the large number of coresavailable in current
                      hardware and showed an order of magnitude shorter time to
                      solution.We will further report on the preliminary
                      qualitative and quantitative analysis ofthe approximate
                      methods and its effects on the runtime of the algorithm,
                      including theresults of running the algorithm on the KNL
                      processors of JULIA. ASSET is currentlyavailable to the
                      scientific community via the Electrophysiological Analysis
                      Toolkit(Elephant)7, and as such is also available to all
                      members of the Human Brain ProjectConsortium via the
                      Collab.Acknowledgments: Supported by Helmholtz Portfolio
                      Theme Supercomputing andModeling for the Human Brain (SMHB),
                      EU grant 604102 (Human Brain Project,HBP), EU Grant 269912
                      (BrainScaleS), DFG Priority Program SPP 1665 (GR 1753/4-1and
                      2175/1-1).REFERENCESAbeles, M. (1991). Corticonics.
                      Cambridge: Cambridge University Press.Gerstein, G. L.,
                      Williams, E. R., Diesmann, M., Grün, S., and Trengove, C.
                      (2012). Detecting synfire chainsin parallel spike data. J.
                      Neurosci. Methods 206, 54–64. doi:
                      10.1016/j.jneumeth.2012.02.003 PMID:22361572Schrader, S.,
                      Bell, M. L., Allen, D. L., Byrnes, W. C., and Leinwand, L.
                      A. (2008). Skeletal muscle adaptations inresponse to
                      voluntary wheel running in myosin heavy chain null mice. J.
                      Neurophysiol. 100, 2165–2176. doi:10.1152/jn.01245.200
                      PMID:NOPMIDTorre, E., Canova, C., Denker, M., Gerstein, G.,
                      Helias, M., and Grün, S. (2016). ASSET: analysis of
                      sequencesof synchronous events in massively parallel spike
                      trains. PLoS Comput. Biol. 12:e1004939. doi:
                      10.1371/journal.pcbi.1004939 PMID:27420734},
      month         = {Feb},
      date          = {2017-02-08},
      organization  = {HBP student conference 2017, Vienna
                       (Austria), 8 Feb 2017 - 10 Feb 2017},
      subtyp        = {After Call},
      cin          = {JSC / INM-6},
      cid          = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)INM-6-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511) / SMHB - Supercomputing and Modelling for the
                      Human Brain (HGF-SMHB-2013-2017) / HBP SGA1 - Human Brain
                      Project Specific Grant Agreement 1 (720270)},
      pid          = {G:(DE-HGF)POF3-511 / G:(DE-Juel1)HGF-SMHB-2013-2017 /
                      G:(EU-Grant)720270},
      typ          = {PUB:(DE-HGF)24},
      url          = {https://juser.fz-juelich.de/record/843867},
}