% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Fischer:1040930,
author = {Fischer, Kirsten and Dahmen, David and Helias, Moritz},
title = {{R}esponse functions in residual networks as a measure for
signal propagation},
reportid = {FZJ-2025-02060},
year = {2025},
abstract = {Residual networks (ResNets) demonstrate superior
trainability and performance compared to feed-forward
networks, particularly at greater depths, due to the
introduction of skip connections that enhance signal
propagation to deeper layers. Prior studies have shown that
incorporating a scaling parameter into the residual branch
can further improve generalization performance. However, the
underlying mechanisms behind these effects and their
robustness across network hyperparameters remain unclear.For
feed-forward networks, finite-size theories have proven
valuable in understanding signal propagation and optimizing
hyperparameters. Extending this approach to ResNets, we
develop a finite-size field theory to systematically analyze
signal propagation and its dependence on the residual
branch's scaling parameter. Through this framework, we
derive analytical expressions for the response function,
which measures the network's sensitivity to varying inputs.
We obtain a formula for the optimal scaling parameter,
revealing that it depends minimally on other
hyperparameters, such as weight variance, thereby explaining
its universality across hyperparameter configurations.},
month = {Mar},
date = {2025-03-16},
organization = {DPG Spring Meeting of the Condensed
Matter Section, Regensburg (Germany),
16 Mar 2025 - 21 Mar 2025},
subtyp = {After Call},
cin = {IAS-6},
cid = {I:(DE-Juel1)IAS-6-20130828},
pnm = {5232 - Computational Principles (POF4-523) / 5234 -
Emerging NC Architectures (POF4-523) / MSNN - Theory of
multi-scale neuronal networks (HGF-SMHB-2014-2018) /
RenormalizedFlows - Transparent Deep Learning with
Renormalized Flows (BMBF-01IS19077A) / ACA - Advanced
Computing Architectures (SO-092) / neuroIC002 - Recurrence
and stochasticity for neuro-inspired computation
(EXS-SF-neuroIC002) / GRK 2416 - GRK 2416:
MultiSenses-MultiScales: Neue Ansätze zur Aufklärung
neuronaler multisensorischer Integration (368482240)},
pid = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5234 /
G:(DE-Juel1)HGF-SMHB-2014-2018 /
G:(DE-Juel-1)BMBF-01IS19077A / G:(DE-HGF)SO-092 /
G:(DE-82)EXS-SF-neuroIC002 / G:(GEPRIS)368482240},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1040930},
}