% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Canova:828363,
      author       = {Canova, Carlos and Klijn, Wouter and Baumeister, Paul F.
                      and Yegenoglu, Alper and Denker, Michael and Pleiter, Dirk},
      title        = {{ASSET} for {JULIA}: executing massively parallel spike
                      correlation analysis on a {KNL} cluster},
      reportid     = {FZJ-2017-02327},
      year         = {2017},
      abstract     = {Introduction: We developed a statistical analysis method,
                      ASSET, capable of detecting repeated sequences of
                      synchronous events (SSE) in massively parallel spike trains
                      [1]. Yet we have not been able to apply ASSET in its full
                      extent, given the high computational demand when assessing
                      significance of the SSEs. This challenge, however, can now
                      be overcome with the support from the High Performance
                      Analytics and Computing Platform (HPAC), and their readily
                      available modern infrastructure. Here we present the first
                      steps towards analyzing electrophysiological recordings with
                      ASSET on one of the new pre-commercial procurement machines,
                      JULIA, which is based on Intel's new Knights Landing (KNL)
                      processor.Motivation: ASSET is an analysis designed to
                      detect and quantify activity in a synfire chain [2], a
                      feedforward neuronal network with high convergence and
                      divergence of connectivity between the layers (groups).
                      Particular to such a network is that it favors the
                      propagation of synchronous spiking activities, which appear
                      in measurements as SSEs. In ASSET, the repetitive occurrence
                      of an identical SSE becomes visible in an intersection
                      matrix as a diagonal structure (DS) [3,4], which is
                      evaluated automatically for significance. Currently, the
                      ASSET method can only be applied to time segments that are
                      considerably shorter than the full duration of a typical
                      session of massively parallel electrophysiological
                      recordings due to costly numerical steps in the analysis.
                      However, these numerical computations are composed of
                      independent steps and thus ASSET would profit from
                      parallelization. A second challenge is the core of the
                      algorithm, which makes extensive use of exponential and
                      logarithmic operations. These operations are computational
                      expensive and do not lend themselves to easy array
                      vectorization on modern HPC hardware.Method: After analysis
                      and instrumentation of ASSET, an MPI version of the software
                      was implemented, distributing the workload across multiple
                      compute instances in a round-robin manner. After the work on
                      the nodes, the partial results are collected on the master
                      node and summed for the final results. In a parallel effort
                      we optimized the core of the ASSET algorithm: the
                      exponential and logarithmic operations are typically
                      calculated using Taylor expansions. Approximate methods
                      perform the same mathematical operations faster at the
                      expense of an error smaller than 1E-6. This speedup can be
                      further improved on by (automatic) array vectorization of
                      the code implementing these methods. These techniques were
                      combined with C implementations using the Cython programming
                      interface.Results: The MPI implementation allowed us to
                      leverage the large number of cores available in current
                      hardware and showed an order of magnitude shorter time to
                      solution. We will further report on the preliminary
                      qualitative and quantitative analysis of the approximate
                      methods and its effects on the runtime of the algorithm,
                      including the results of running the algorithm on the KNL
                      processors of JULIA. ASSET is currently available to the
                      scientific community via the Electrophysiological Analysis
                      Toolkit (Elephant) [5], and as such is also available to all
                      members of the Human Brain Project Consortium via the
                      Collab.AcknowledgementsSupported by Helmholtz Portfolio
                      Theme Supercomputing and Modeling for the Human Brain
                      (SMHB), EU grant 604102 (Human Brain Project, HBP), EU Grant
                      269912 (BrainScaleS), DFG Priority Program SPP 1665 (GR
                      1753/4-1 and 2175/1-1).References1. Torre E. et al (2016)
                      PloS CB 12(7):e1004939. 10.1371/journal.pcbi.10049392.
                      Abeles M. (1991) Corticonics, Cambridge University Press,
                      Cambridge3. Schrader S. et al (2008) J Neurophysiol 100:
                      2165-2176, 10.1152/jn.01245.2004. Gerstein GL. et al (2012)
                      J Neurosci Methods 206: 54-64,
                      10.1016/j.jneumeth.2012.02.0035.
                      neuralensemble.org/elephant/},
      month         = {Feb},
      date          = {2017-02-08},
      organization  = {1st HBP Student Conference, Vienna
                       (Austria), 8 Feb 2017 - 10 Feb 2017},
      subtyp        = {After Call},
      cin          = {INM-6 / IAS-6 / INM-10 / JSC},
      cid          = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)IAS-6-20130828 /
                      I:(DE-Juel1)INM-10-20170113 / I:(DE-Juel1)JSC-20090406},
      pnm          = {571 - Connectivity and Activity (POF3-571) / 511 -
                      Computational Science and Mathematical Methods (POF3-511) /
                      513 - Supercomputer Facility (POF3-513) / SMHB -
                      Supercomputing and Modelling for the Human Brain
                      (HGF-SMHB-2013-2017) / HBP SGA1 - Human Brain Project
                      Specific Grant Agreement 1 (720270) / BRAINSCALES -
                      Brain-inspired multiscale computation in neuromorphic hybrid
                      systems (269921) / DFG project 238707842 - Kausative
                      Mechanismen mesoskopischer Aktivitätsmuster in der
                      auditorischen Kategorien-Diskrimination (238707842)},
      pid          = {G:(DE-HGF)POF3-571 / G:(DE-HGF)POF3-511 /
                      G:(DE-HGF)POF3-513 / G:(DE-Juel1)HGF-SMHB-2013-2017 /
                      G:(EU-Grant)720270 / G:(EU-Grant)269921 /
                      G:(GEPRIS)238707842},
      typ          = {PUB:(DE-HGF)24},
      url          = {https://juser.fz-juelich.de/record/828363},
}