% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{MorenoAlvarez:916825,
      author       = {Moreno-Alvarez, Sergio and Paoletti, Mercedes E. and Rico,
                      Juan A. and Cavallaro, Gabriele and Haut, Juan M.},
      title        = {{O}ptimizing {D}istributed {D}eep {L}earning in
                      {H}eterogeneous {C}omputing {P}latforms for {R}emote
                      {S}ensing {D}ata {C}lassification},
      publisher    = {IEEE},
      reportid     = {FZJ-2023-00122},
      pages        = {2726-2729},
      year         = {2022},
      abstract     = {Applications from Remote Sensing (RS) unveiled unique
                      challenges to Deep Learning (DL) due to the high volume and
                      complexity of their data. On the one hand, deep neural
                      network architectures have the capability to automatically
                      ex-tract informative features from RS data. On the other
                      hand, these models have massive amounts of tunable
                      parameters, requiring high computational capabilities.
                      Distributed DL with data parallelism on High-Performance
                      Computing (HPC) systems have proved necessary in dealing
                      with the demands of DL models. Nevertheless, a single HPC
                      system can be al-ready highly heterogeneous and include
                      different computing resources with uneven processing power.
                      In this context, a standard data parallelism strategy does
                      not partition the data efficiently according to the
                      available computing resources. This paper proposes an
                      alternative approach to compute the gradient, which
                      guarantees that the contribution to the gradient calculation
                      is proportional to the processing speed of each DL model's
                      replica. The experimental results are obtained in a
                      heterogeneous HPC system with RS data and demonstrate that
                      the proposed approach provides a significant training speed
                      up and gain in the global accuracy compared to one of the
                      state-of-the-art distributed DL framework.},
      month         = {Jul},
      date          = {2022-07-17},
      organization  = {IGARSS 2022 - 2022 IEEE International
                       Geoscience and Remote Sensing
                       Symposium, Kuala Lumpur (Malaysia), 17
                       Jul 2022 - 22 Jul 2022},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / DEEP-EST - DEEP -
                      Extreme Scale Technologies (754304)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)754304},
      typ          = {PUB:(DE-HGF)8},
      UT           = {WOS:000920916602230},
      doi          = {10.1109/IGARSS46834.2022.9883762},
      url          = {https://juser.fz-juelich.de/record/916825},
}