% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Inanc:1007693,
      author       = {Inanc, Eray and Albers, Marian and Sarma, Rakesh and Aach,
                      Marcel and Schröder, Wolfgang and Lintermann, Andreas},
      title        = {{P}arallel and {S}calable {D}eep {L}earning to
                      {R}econstruct {A}ctuated {T}urbulent {B}oundary {L}ayer
                      {F}lows. {P}art {II}: {A}utoencoder {T}raining on {HPC}
                      {S}ystems},
      reportid     = {FZJ-2023-02167},
      pages        = {4 pages},
      year         = {2022},
      abstract     = {Convolutional autoencoders are trained on exceptionally
                      large actuated turbulent boundary layer simulation data (8.3
                      TB) on the high-performance computer JUWELS at the
                      $J\"ulich$ Supercomputing Centre. The parallelization of the
                      training is based on a distributed data-parallelism
                      approach. This method relies on distributing the training
                      dataset to multiple workers, where the trainable parameters
                      of the convolutional autoencoder network are occasionally
                      exchanged between the workers. This allows the training
                      times to be drastically reduced - almost linear scaling
                      performance can be achieved by increasing the number of
                      workers (up to 2,048 GPUs). As a consequence of this
                      increase, the total batch size also increases. This directly
                      affects the training accuracy and hence, the quality of the
                      trained network. The training error, computed between the
                      reference and the reconstructed turbulent boundary layer
                      fields, becomes larger when the number of workers is
                      increased. This behavior needs to be taken care of
                      especially when going to a large number of workers, i.e., a
                      compromise between parallel speed and accuracy needs to be
                      found.},
      month         = {May},
      date          = {2022-05-25},
      organization  = {33rd International Conference on
                       Parallel Computational Fluid Dynamics,
                       Alba (Italy), 25 May 2022 - 27 May
                       2022},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / RAISE - Research on
                      AI- and Simulation-Based Engineering at Exascale (951733)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)951733},
      typ          = {PUB:(DE-HGF)8},
      url          = {https://juser.fz-juelich.de/record/1007693},
}