% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Riedel:893827,
author = {Riedel, Morris and Sedona, Rocco and Barakat, Chadi and
Einarsson, Petur and Hassanian, Reza and Cavallaro, Gabriele
and Book, Matthias and Neukirchen, Helmut and Lintermann,
Andreas},
title = {{P}ractice and {E}xperience in using {P}arallel and
{S}calable {M}achine {L}earning with {H}eterogenous
{M}odular {S}upercomputing {A}rchitectures},
publisher = {IEEE},
reportid = {FZJ-2021-02866},
pages = {76-85},
year = {2021},
abstract = {We observe a continuously increased use of Deep Learning
(DL) as a specific type of Machine Learning (ML) for
data-intensive problems (i.e., ’big data’) that requires
powerful computing resources with equally increasing
performance. Consequently, innovative heterogeneous
High-Performance Computing (HPC) systems based on multi-core
CPUs and many-core GPUs require an architectural design that
addresses end user communities’ requirements that take
advantage of ML and DL. Still the workloads of end user
communities of the simulation sciences (e.g., using
numerical methods based on known physical laws) needs to be
equally supported in those architectures. This paper offers
insights into the Modular Supercomputer Architecture (MSA)
developed in the Dynamic Exascale Entry Platform (DEEP)
series of projects to address the requirements of both
simulation sciences and data-intensive sciences such as High
Performance Data Analytics (HPDA). It shares insights into
implementing the MSA in the Jülich Supercomputing Centre
(JSC) hosting Europe No. 1 Supercomputer Jülich Wizard for
European Leadership Science (JUWELS). We augment the
technical findings with experience and lessons learned from
two application communities case studies (i.e., remote
sensing and health sciences) using the MSA with JUWELS and
the DEEP systems in practice. Thus, the paper provides
details into specific MSA design elements that enable
significant performance improvements of ML and DL
algorithms. While this paper focuses on MSA-based HPC
systems and application experience, we are not losing sight
of advances in Cloud Computing (CC) and Quantum Computing
(QC) relevant for ML and DL.},
month = {Jun},
date = {2021-06-17},
organization = {IEEE International Parallel and
Distributed Processing Symposium
Workshops (IPDPSW), Portland (USA), 17
Jun 2021 - 21 Jun 2021},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / 5111 - Domain-Specific
Simulation $\&$ Data Life Cycle Labs (SDLs) and Research
Groups (POF4-511) / DEEP-EST - DEEP - Extreme Scale
Technologies (754304) / AISee - AI- and Simulation-Based
Engineering at Exascale (951733) / DEEP-SEA - DEEP –
SOFTWARE FOR EXASCALE ARCHITECTURES (955606) / EUROCC -
National Competence Centres in the framework of EuroHPC
(951732)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5111 /
G:(EU-Grant)754304 / G:(EU-Grant)951733 / G:(EU-Grant)955606
/ G:(EU-Grant)951732},
typ = {PUB:(DE-HGF)8},
UT = {WOS:000689576200008},
doi = {10.1109/IPDPSW52791.2021.00019},
url = {https://juser.fz-juelich.de/record/893827},
}