% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Fischer:1029334,
author = {Fischer, Kirsten and Lindner, Javed and Dahmen, David and
Ringel, Zohar and Krämer, Michael and Helias, Moritz},
title = {{C}ritical feature learning in deep neural networks},
reportid = {FZJ-2024-05061},
year = {2024},
abstract = {A key property of neural networks driving their success is
their ability to learn features from data. Understanding
feature learning from a theoretical viewpoint is an emerging
field with many open questions. In this work we capture
finite-width effects with a systematic theory of network
kernels in deep non-linear neural networks. We show that the
Bayesian prior of the network can be written in closed form
as a superposition of Gaussian processes, whose kernels are
distributed with a variance that depends inversely on the
network width N . A large deviation approach, which is exact
in the proportional limit for the number of data points
P=αN→∞, yields a pair of forward-backward equations for
the maximum a posteriori kernels in all layers at once. We
study their solutions perturbatively to demonstrate how the
backward propagation across layers aligns kernels with the
target. An alternative field-theoretic formulation shows
that kernel adaptation of the Bayesian posterior at
finite-width results from fluctuations in the prior: larger
fluctuations correspond to a more flexible network prior and
thus enable stronger adaptation to data. We thus find a
bridge between the classical edge-of-chaos NNGP theory and
feature learning, exposing an intricate interplay between
criticality, response functions, and feature scale.},
month = {Jul},
date = {2024-07-21},
organization = {The Forty-first International
Conference on Machine Learning, Wien
(Austria), 21 Jul 2024 - 27 Jul 2024},
subtyp = {After Call},
cin = {IAS-6},
cid = {I:(DE-Juel1)IAS-6-20130828},
pnm = {5232 - Computational Principles (POF4-523) / 5234 -
Emerging NC Architectures (POF4-523) / RenormalizedFlows -
Transparent Deep Learning with Renormalized Flows
(BMBF-01IS19077A) / MSNN - Theory of multi-scale neuronal
networks (HGF-SMHB-2014-2018) / ACA - Advanced Computing
Architectures (SO-092)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5234 /
G:(DE-Juel-1)BMBF-01IS19077A /
G:(DE-Juel1)HGF-SMHB-2014-2018 / G:(DE-HGF)SO-092},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/1029334},
}