% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Rojas:910530,
      author       = {Rojas, Elvis and Knobloch, Michael and Daoud, Nour and
                      Meneses, Esteban and Mohr, Bernd},
      title        = {{E}arly {E}xperiences of {N}oise-{S}ensitivity
                      {P}erformance {A}nalysis of a {D}istributed {D}eep
                      {L}earning {F}ramework},
      publisher    = {IEEE},
      reportid     = {FZJ-2022-03912},
      pages        = {516-522},
      year         = {2022},
      abstract     = {Deep Learning (DL) applications are used to solve complex
                      problems efficiently. These applications require complex
                      neural network models composed of millions of parameters and
                      huge amounts of data for proper training. This is only
                      possible by parallelizing the necessary computations by
                      so-called distributed deep learning (DDL) frameworks over
                      many GPUs distributed over multiple nodes of a HPC cluster.
                      These frameworks mostly utilize the compute power of the
                      GPUs and use only a small portion of the available compute
                      power of the CPUs in the nodes for I/O and inter-process
                      communication, leaving many CPU cores idle and unused. The
                      more powerful the base CPU in the cluster nodes, the more
                      compute resources are wasted. In this paper, we investigate
                      how much of this unutilized compute resources could be used
                      for executing other applications without lowering the
                      performance of the DDL frameworks. In our experiments, we
                      executed a noise-generation application, which generates a
                      very-high memory, network or I/O load, in parallel with DDL
                      frameworks, and use HPC profiling and tracing techniques to
                      determine whether and how the generated noise is affecting
                      the performance of the DDL frameworks. Early results
                      indicate that it might be possible to utilize the idle cores
                      for jobs of other users without affecting the performance of
                      the DDL applications in a negative way.},
      month         = {Sep},
      date          = {2022-09-06},
      organization  = {2022 IEEE International Conference on
                       Cluster Computing, Heidelberg
                       (Germany), 6 Sep 2022 - 9 Sep 2022},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
                      and Research Groups (POF4-511) / ExtraNoise –
                      Leistungsanalyse von HPC-Anwendungen in verrauschten
                      Umgebungen (449683531) / ATMLPP - ATML Parallel Performance
                      (ATMLPP)},
      pid          = {G:(DE-HGF)POF4-5112 / G:(GEPRIS)449683531 /
                      G:(DE-Juel-1)ATMLPP},
      typ          = {PUB:(DE-HGF)8},
      UT           = {WOS:000920273100051},
      doi          = {10.1109/CLUSTER51413.2022.00066},
      url          = {https://juser.fz-juelich.de/record/910530},
}