Short Paper: Accelerating Hyperparameter Optimization Algorithms with Mixed Precision

Aach, Marcel; Sarma, Rakesh; Inanc, Eray; Riedel, Morris; Lintermann, Andreas
doi:10.1145/3624062.3624259
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Aach:1018062,
      author       = {Aach, Marcel and Sarma, Rakesh and Inanc, Eray and Riedel,
                      Morris and Lintermann, Andreas},
      title        = {{S}hort {P}aper: {A}ccelerating {H}yperparameter
                      {O}ptimization {A}lgorithms with {M}ixed {P}recision},
      publisher    = {ACM New York, NY, USA},
      reportid     = {FZJ-2023-04518},
      pages        = {1776–1779},
      year         = {2023},
      abstract     = {Hyperparameter Optimization (HPO) of Neural Networks (NNs)
                      is a computationally expensive procedure. On accelerators,
                      such as NVIDIA Graphics Processing Units (GPUs) equipped
                      with Tensor Cores, it is possible to speed-up the NN
                      training by reducing the precision of some of the NN
                      parameters, also referred to as mixed precision training.
                      This paper investigates the performance of three popular HPO
                      algorithms in terms of the achieved speed-up and model
                      accuracy, utilizing early stopping, Bayesian, and genetic
                      optimization approaches, in combination with mixed precision
                      functionalities. The benchmarks are performed on 64 GPUs in
                      parallel on three datasets: two from the vision and one from
                      the Computational Fluid Dynamics domain. The results show
                      that larger speed-ups can be achieved for mixed compared to
                      full precision HPO if the checkpoint frequency is kept low.
                      In addition to the reduced runtime, small gains in
                      generalization performance on the test set are observed.},
      month         = {Nov},
      date          = {2023-11-12},
      organization  = {SC-W 2023: Workshops of The
                       International Conference on High
                       Performance Computing, Network,
                       Storage, and Analysis, Denver, CO
                       (USA), 12 Nov 2023 - 17 Nov 2023},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / RAISE - Research on
                      AI- and Simulation-Based Engineering at Exascale (951733)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)951733},
      typ          = {PUB:(DE-HGF)8},
      doi          = {10.1145/3624062.3624259},
      url          = {https://juser.fz-juelich.de/record/1018062},
}
Gast :: Anmelden JuSER
		Suchen		Absenden		Personalisieren Ihre Benachrichtigungen Ihre Körbe Ihre Suchanfragen		Hilfe