% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Siegel:1037668,
author = {Siegel, Sebastian and Yang, Ming-Jay and Strachan, John
Paul},
title = {{IMSSA}: {D}eploying modern state-space models on
memristive in-memory compute hardware},
publisher = {arXiv},
reportid = {FZJ-2025-00833},
year = {2024},
abstract = {Processing long temporal sequences is a key challenge in
deep learning. In recent years, Transformers have become
state-of-the-art for this task, but suffer from excessive
memory requirements due to the need to explicitly store the
sequences. To address this issue, structured state-space
sequential (S4) models recently emerged, offering a fixed
memory state while still enabling the processing of very
long sequence contexts. The recurrent linear update of the
state in these models makes them highly efficient on modern
graphics processing units (GPU) by unrolling the recurrence
into a convolution. However, this approach demands
significant memory and massively parallel computation, which
is only available on the latest GPUs. In this work, we aim
to bring the power of S4 models to edge hardware by
significantly reducing the size and computational demand of
an S4D model through quantization-aware training, even
achieving ternary weights for a simple real-world task. To
this end, we extend conventional quantization-aware training
to tailor it for analog in-memory compute hardware. We then
demonstrate the deployment of recurrent S4D kernels on
memrisitve crossbar arrays, enabling their computation in an
in-memory compute fashion. To our knowledge, this is the
first implementation of S4 kernels on in-memory compute
hardware.},
keywords = {Machine Learning (cs.LG) (Other) / Hardware Architecture
(cs.AR) (Other) / FOS: Computer and information sciences
(Other)},
cin = {PGI-14},
cid = {I:(DE-Juel1)PGI-14-20210412},
pnm = {5234 - Emerging NC Architectures (POF4-523)},
pid = {G:(DE-HGF)POF4-5234},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/arXiv.2412.20215},
url = {https://juser.fz-juelich.de/record/1037668},
}