% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Rubin:1052069,
author = {Rubin, Noa and Fischer, Kirsten and Lindner, Javed and
Dahmen, David and Seroussi, Inbar and Ringel, Zohar and
Michael, Krämer and Helias, Moritz},
title = {{F}rom {K}ernels to {F}eatures: {A} {M}ulti-{S}cale
{A}daptive {T}heory of {F}eature {L}earning},
reportid = {FZJ-2026-00739},
year = {2025},
abstract = {Feature learning in neural networks is crucial fortheir
expressive power and inductive biases, moti-vating various
theoretical approaches. Some ap-proaches describe network
behavior after train-ing through a change in kernel scale
from initial-ization, resulting in a generalization power
com-parable to a Gaussian process. Conversely, inother
approaches training results in the adapta-tion of the kernel
to the data, involving directionalchanges to the kernel. The
relationship and re-spective strengths of these two views
have so farremained unresolved. This work presents a
theo-retical framework of multi-scale adaptive
featurelearning bridging these two views. Using methodsfrom
statistical mechanics, we derive analyticalexpressions for
network output statistics whichare valid across scaling
regimes and in the contin-uum between them. A systematic
expansion ofthe network’s probability distribution reveals
thatmean-field scaling requires only a
saddle-pointapproximation, while standard scaling
necessi-tates additional correction terms. Remarkably,we
find across regimes that kernel adaptation canbe reduced to
an effective kernel rescaling whenpredicting the mean
network output in the spe-cial case of a linear network.
However, for linearand non-linear networks, the multi-scale
adaptiveapproach captures directional feature learning
ef-fects, providing richer insights than what couldbe
recovered from a rescaling of the kernel alone},
month = {Jul},
date = {2025-07-13},
organization = {The 42nd International Conference on
Machine Learning, Vancouver (Canada),
13 Jul 2025 - 19 Jul 2025},
subtyp = {After Call},
cin = {IAS-6},
cid = {I:(DE-Juel1)IAS-6-20130828},
pnm = {5232 - Computational Principles (POF4-523) / 5234 -
Emerging NC Architectures (POF4-523) / MSNN - Theory of
multi-scale neuronal networks (HGF-SMHB-2014-2018) / ACA -
Advanced Computing Architectures (SO-092) / GRK 2416 - GRK
2416: MultiSenses-MultiScales: Neue Ansätze zur Aufklärung
neuronaler multisensorischer Integration (368482240)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5234 /
G:(DE-Juel1)HGF-SMHB-2014-2018 / G:(DE-HGF)SO-092 /
G:(GEPRIS)368482240},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1052069},
}