% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Leroux:1050455,
author = {Leroux, Nathan and Manea, Paul and Sudarshan, Chirag and
Finkbeiner, Jan and Siegel, Sebastian and Strachan, John
Paul and Neftci, Emre},
title = {{A}nalog in-memory computing attention mechanism for fast
and energy-efficient large language models},
journal = {Nature computational science},
volume = {5},
number = {9},
issn = {2662-8457},
address = {London},
publisher = {Nature Research},
reportid = {FZJ-2026-00225},
pages = {813 - 824},
year = {2025},
abstract = {Transformer networks, driven by self-attention, are central
to large languagemodels. In generative transformers,
self-attention uses cache memoryto store token projections,
avoiding recomputation at each time step.However, graphics
processing unit (GPU)-stored projections must be loadedinto
static random-access memory for each new generation step,
causinglatency and energy bottlenecks. Here we present a
custom self-attentionin-memory computing architecture based
on emerging charge-basedmemories called gain cells, which
can be efficiently written to store newtokens during
sequence generation and enable parallel analog
dot-productcomputation required for self-attention. However,
the analog gain-cellcircuits introduce non-idealities and
constraints preventing the directmapping of pre-trained
models. To circumvent this problem, we design
aninitialization algorithm achieving text-processing
performance comparableto GPT-2 without training from
scratch. Our architecture reduces attentionlatency and
energy consumption by up to two and four orders of
magnitude,respectively, compared with GPUs, marking a
substantial step towardultrafast, low-power generative
transformers},
cin = {PGI-14 / PGI-15},
ddc = {004},
cid = {I:(DE-Juel1)PGI-14-20210412 / I:(DE-Juel1)PGI-15-20210701},
pnm = {5234 - Emerging NC Architectures (POF4-523)},
pid = {G:(DE-HGF)POF4-5234},
typ = {PUB:(DE-HGF)16},
doi = {10.1038/s43588-025-00854-1},
url = {https://juser.fz-juelich.de/record/1050455},
}