% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Fischer:1040930,
      author       = {Fischer, Kirsten and Dahmen, David and Helias, Moritz},
      title        = {{R}esponse functions in residual networks as a measure for
                      signal propagation},
      reportid     = {FZJ-2025-02060},
      year         = {2025},
      abstract     = {Residual networks (ResNets) demonstrate superior
                      trainability and performance compared to feed-forward
                      networks, particularly at greater depths, due to the
                      introduction of skip connections that enhance signal
                      propagation to deeper layers. Prior studies have shown that
                      incorporating a scaling parameter into the residual branch
                      can further improve generalization performance. However, the
                      underlying mechanisms behind these effects and their
                      robustness across network hyperparameters remain unclear.For
                      feed-forward networks, finite-size theories have proven
                      valuable in understanding signal propagation and optimizing
                      hyperparameters. Extending this approach to ResNets, we
                      develop a finite-size field theory to systematically analyze
                      signal propagation and its dependence on the residual
                      branch's scaling parameter. Through this framework, we
                      derive analytical expressions for the response function,
                      which measures the network's sensitivity to varying inputs.
                      We obtain a formula for the optimal scaling parameter,
                      revealing that it depends minimally on other
                      hyperparameters, such as weight variance, thereby explaining
                      its universality across hyperparameter configurations.},
      month         = {Mar},
      date          = {2025-03-16},
      organization  = {DPG Spring Meeting of the Condensed
                       Matter Section, Regensburg (Germany),
                       16 Mar 2025 - 21 Mar 2025},
      subtyp        = {After Call},
      cin          = {IAS-6},
      cid          = {I:(DE-Juel1)IAS-6-20130828},
      pnm          = {5232 - Computational Principles (POF4-523) / 5234 -
                      Emerging NC Architectures (POF4-523) / MSNN - Theory of
                      multi-scale neuronal networks (HGF-SMHB-2014-2018) /
                      RenormalizedFlows - Transparent Deep Learning with
                      Renormalized Flows (BMBF-01IS19077A) / ACA - Advanced
                      Computing Architectures (SO-092) / neuroIC002 - Recurrence
                      and stochasticity for neuro-inspired computation
                      (EXS-SF-neuroIC002) / GRK 2416 - GRK 2416:
                      MultiSenses-MultiScales: Neue Ansätze zur Aufklärung
                      neuronaler multisensorischer Integration (368482240)},
      pid          = {G:(DE-HGF)POF4-5232 / G:(DE-HGF)POF4-5234 /
                      G:(DE-Juel1)HGF-SMHB-2014-2018 /
                      G:(DE-Juel-1)BMBF-01IS19077A / G:(DE-HGF)SO-092 /
                      G:(DE-82)EXS-SF-neuroIC002 / G:(GEPRIS)368482240},
      typ          = {PUB:(DE-HGF)6},
      url          = {https://juser.fz-juelich.de/record/1040930},
}