Ensemble Kalman Filter Optimizing Deep Neural Networks: An Alternative Approach to Non-performing Gradient Descent

Yegenoglu, Alper; Diaz, Sandra; Krajsek, Kai; Herty, Michael
doi:10.1007/978-3-030-64580-9_7
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Yegenoglu:889208,
      author       = {Yegenoglu, Alper and Krajsek, Kai and Diaz, Sandra and
                      Herty, Michael},
      title        = {{E}nsemble {K}alman {F}ilter {O}ptimizing {D}eep {N}eural
                      {N}etworks: {A}n {A}lternative {A}pproach to
                      {N}on-performing {G}radient {D}escent; 5th ed.},
      volume       = {12566},
      address      = {Cham},
      publisher    = {Springer},
      reportid     = {FZJ-2021-00117},
      series       = {Lecture Notes in Computer Science},
      pages        = {78-92},
      year         = {2020},
      comment      = {Machine Learning, Optimization, and Data Science},
      booktitle     = {Machine Learning, Optimization, and
                       Data Science},
      abstract     = {The successful training of deep neural networks is
                      dependent on initialization schemes and choice of activation
                      functions. Non-optimally chosen parameter settings lead to
                      the known problem of exploding or vanishing gradients. This
                      issue occurs when gradient descent and backpropagation are
                      applied. For this setting the Ensemble Kalman Filter (EnKF)
                      can be used as an alternative optimizer when training neural
                      networks. The EnKF does not require the explicit calculation
                      of gradients or adjoints and we show this resolves the
                      exploding and vanishing gradient problem. We analyze
                      different parameter initializations, propose a dynamic
                      change in ensembles and compare results to established
                      methods.},
      month         = {Jul},
      date          = {2020-07-19},
      organization  = {The Sixth International Conference on
                       Machine Learning, Optimization, and
                       Data Science, Siena (Italy), 19 Jul
                       2020 - 22 Jul 2020},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511) / SMHB - Supercomputing and Modelling for the
                      Human Brain (HGF-SMHB-2013-2017) / CSD-SSD - Center for
                      Simulation and Data Science (CSD) - School for Simulation
                      and Data Science (SSD) (CSD-SSD-20190612) / SLNS - SimLab
                      Neuroscience (Helmholtz-SLNS) / HDS LEE - Helmholtz School
                      for Data Science in Life, Earth and Energy (HDS LEE)
                      (HDS-LEE-20190612) / PhD no Grant - Doktorand ohne besondere
                      Förderung (PHD-NO-GRANT-20170405) / HAF - Helmholtz
                      Analytics Framework (ZT-I-0003)},
      pid          = {G:(DE-HGF)POF3-511 / G:(DE-Juel1)HGF-SMHB-2013-2017 /
                      G:(DE-Juel1)CSD-SSD-20190612 / G:(DE-Juel1)Helmholtz-SLNS /
                      G:(DE-Juel1)HDS-LEE-20190612 /
                      G:(DE-Juel1)PHD-NO-GRANT-20170405 / G:(DE-HGF)ZT-I-0003},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      UT           = {WOS:001544539700007},
      doi          = {10.1007/978-3-030-64580-9_7},
      url          = {https://juser.fz-juelich.de/record/889208},
}
guest :: login JuSER
		Search		Submit		Personalize Your alerts Your baskets Your searches		Help