% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Khalfaoui:1053126,
author = {Khalfaoui, Ismail and Kesselheim, Stefan},
title = {{P}olynomial, trigonometric, and tropical activations},
publisher = {arXiv},
reportid = {FZJ-2026-01459, arXiv:2502.01247},
year = {2025},
abstract = {Which functions can be used as activations in deep neural
networks? This article explores families of functions based
on orthonormal bases, including the Hermite polynomial basis
and the Fourier trigonometric basis, as well as a basis
resulting from the tropicalization of a polynomial basis.
Our study shows that, through simple variance-preserving
initialization and without additional clamping mechanisms,
these activations can successfully be used to train deep
models, such as GPT-2 for next-token prediction on
OpenWebText and ConvNeXt for image classification on
ImageNet. Our work addresses the issue of exploding and
vanishing activations and gradients, particularly prevalent
with polynomial activations, and opens the door for
improving the efficiency of large-scale learning tasks.
Furthermore, our approach provides insight into the
structure of neural networks, revealing that networks with
polynomial activations can be interpreted as multivariate
polynomial mappings. Finally, using Hermite interpolation,
we show that our activations can closely approximate
classical ones in pre-trained models by matching both the
function and its derivative, making them especially useful
for fine-tuning tasks. These activations are available in
the torchortho library, which can be accessed via:
https://github.com/K-H-Ismail/torchortho.},
keywords = {Machine Learning (cs.LG) (Other) / Artificial Intelligence
(cs.AI) (Other) / Computation and Language (cs.CL) (Other) /
Computer Vision and Pattern Recognition (cs.CV) (Other) /
Algebraic Geometry (math.AG) (Other) / FOS: Computer and
information sciences (Other) / FOS: Mathematics (Other)},
pnm = {Helmholtz AI Consultant Team FB Information (E54.303.11) /
nxtAIM - nxtAIM – NXT GEN AI Methods (19A23014l) / 5112 -
Cross-Domain Algorithms, Tools, Methods Labs (ATMLs) and
Research Groups (POF4-511)},
pid = {G:(DE-Juel-1)E54.303.11 / G:(BMWK)19A23014l /
G:(DE-HGF)POF4-5112},
typ = {PUB:(DE-HGF)25},
eprint = {2502.01247},
howpublished = {arXiv:2502.01247},
archivePrefix = {arXiv},
SLACcitation = {$\%\%CITATION$ = $arXiv:2502.01247;\%\%$},
url = {https://juser.fz-juelich.de/record/1053126},
}