QS4D: Quantization-aware training for efficient hardware deployment of structured state-space sequential models

Siegel, Sebastian; Fabre, Maxime; Yang, Ming-Jay; Strachan, John Paul; Bouhadjar, Younes; Neftci, Emre
doi:10.48550/ARXIV.2507.06079
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Siegel:1050452,
      author       = {Siegel, Sebastian and Yang, Ming-Jay and Bouhadjar, Younes
                      and Fabre, Maxime and Neftci, Emre and Strachan, John Paul},
      title        = {{QS}4{D}: {Q}uantization-aware training for efficient
                      hardware deployment of structured state-space sequential
                      models},
      publisher    = {arXiv},
      reportid     = {FZJ-2026-00222},
      year         = {2025},
      abstract     = {Structured State Space models (SSM) have recently emerged
                      as a new class of deep learning models, particularly
                      well-suited for processing long sequences. Their constant
                      memory footprint, in contrast to the linearly scaling memory
                      demands of Transformers, makes them attractive candidates
                      for deployment on resource-constrained edge-computing
                      devices. While recent works have explored the effect of
                      quantization-aware training (QAT) on SSMs, they typically do
                      not address its implications for specialized edge hardware,
                      for example, analog in-memory computing (AIMC) chips. In
                      this work, we demonstrate that QAT can significantly reduce
                      the complexity of SSMs by up to two orders of magnitude
                      across various performance metrics. We analyze the relation
                      between model size and numerical precision, and show that
                      QAT enhances robustness to analog noise and enables
                      structural pruning. Finally, we integrate these techniques
                      to deploy SSMs on a memristive analog in-memory computing
                      substrate and highlight the resulting benefits in terms of
                      computational efficiency.},
      keywords     = {Machine Learning (cs.LG) (Other) / Artificial Intelligence
                      (cs.AI) (Other) / FOS: Computer and information sciences
                      (Other)},
      cin          = {PGI-14 / PGI-15},
      cid          = {I:(DE-Juel1)PGI-14-20210412 / I:(DE-Juel1)PGI-15-20210701},
      pnm          = {5234 - Emerging NC Architectures (POF4-523) / BMBF
                      03ZU1106CB - NeuroSys: Algorithm-Hardware Co-Design (Projekt
                      C) - B (BMBF-03ZU1106CB)},
      pid          = {G:(DE-HGF)POF4-5234 / G:(DE-Juel1)BMBF-03ZU1106CB},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/ARXIV.2507.06079},
      url          = {https://juser.fz-juelich.de/record/1050452},
}
Gast :: Anmelden JuSER
		Suchen		Absenden		Personalisieren Ihre Benachrichtigungen Ihre Körbe Ihre Suchanfragen		Hilfe