IMSSA: Deploying modern state-space models on memristive in-memory compute hardware

Siegel, Sebastian; Yang, Ming-Jay; Strachan, John Paul
doi:10.48550/arXiv.2412.20215
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Siegel:1037668,
      author       = {Siegel, Sebastian and Yang, Ming-Jay and Strachan, John
                      Paul},
      title        = {{IMSSA}: {D}eploying modern state-space models on
                      memristive in-memory compute hardware},
      publisher    = {arXiv},
      reportid     = {FZJ-2025-00833},
      year         = {2024},
      abstract     = {Processing long temporal sequences is a key challenge in
                      deep learning. In recent years, Transformers have become
                      state-of-the-art for this task, but suffer from excessive
                      memory requirements due to the need to explicitly store the
                      sequences. To address this issue, structured state-space
                      sequential (S4) models recently emerged, offering a fixed
                      memory state while still enabling the processing of very
                      long sequence contexts. The recurrent linear update of the
                      state in these models makes them highly efficient on modern
                      graphics processing units (GPU) by unrolling the recurrence
                      into a convolution. However, this approach demands
                      significant memory and massively parallel computation, which
                      is only available on the latest GPUs. In this work, we aim
                      to bring the power of S4 models to edge hardware by
                      significantly reducing the size and computational demand of
                      an S4D model through quantization-aware training, even
                      achieving ternary weights for a simple real-world task. To
                      this end, we extend conventional quantization-aware training
                      to tailor it for analog in-memory compute hardware. We then
                      demonstrate the deployment of recurrent S4D kernels on
                      memrisitve crossbar arrays, enabling their computation in an
                      in-memory compute fashion. To our knowledge, this is the
                      first implementation of S4 kernels on in-memory compute
                      hardware.},
      keywords     = {Machine Learning (cs.LG) (Other) / Hardware Architecture
                      (cs.AR) (Other) / FOS: Computer and information sciences
                      (Other)},
      cin          = {PGI-14},
      cid          = {I:(DE-Juel1)PGI-14-20210412},
      pnm          = {5234 - Emerging NC Architectures (POF4-523)},
      pid          = {G:(DE-HGF)POF4-5234},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/arXiv.2412.20215},
      url          = {https://juser.fz-juelich.de/record/1037668},
}
Gast :: Anmelden JuSER
		Suchen		Absenden		Personalisieren Ihre Benachrichtigungen Ihre Körbe Ihre Suchanfragen		Hilfe