% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Inanc:1007693,
author = {Inanc, Eray and Albers, Marian and Sarma, Rakesh and Aach,
Marcel and Schröder, Wolfgang and Lintermann, Andreas},
title = {{P}arallel and {S}calable {D}eep {L}earning to
{R}econstruct {A}ctuated {T}urbulent {B}oundary {L}ayer
{F}lows. {P}art {II}: {A}utoencoder {T}raining on {HPC}
{S}ystems},
reportid = {FZJ-2023-02167},
pages = {4 pages},
year = {2022},
abstract = {Convolutional autoencoders are trained on exceptionally
large actuated turbulent boundary layer simulation data (8.3
TB) on the high-performance computer JUWELS at the
$J\"ulich$ Supercomputing Centre. The parallelization of the
training is based on a distributed data-parallelism
approach. This method relies on distributing the training
dataset to multiple workers, where the trainable parameters
of the convolutional autoencoder network are occasionally
exchanged between the workers. This allows the training
times to be drastically reduced - almost linear scaling
performance can be achieved by increasing the number of
workers (up to 2,048 GPUs). As a consequence of this
increase, the total batch size also increases. This directly
affects the training accuracy and hence, the quality of the
trained network. The training error, computed between the
reference and the reconstructed turbulent boundary layer
fields, becomes larger when the number of workers is
increased. This behavior needs to be taken care of
especially when going to a large number of workers, i.e., a
compromise between parallel speed and accuracy needs to be
found.},
month = {May},
date = {2022-05-25},
organization = {33rd International Conference on
Parallel Computational Fluid Dynamics,
Alba (Italy), 25 May 2022 - 27 May
2022},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / RAISE - Research on
AI- and Simulation-Based Engineering at Exascale (951733)},
pid = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)951733},
typ = {PUB:(DE-HGF)8},
url = {https://juser.fz-juelich.de/record/1007693},
}