% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Hoppe:1019998,
author = {Hoppe, Fabian and Comito, Claudia and Gutiérrez Hermosillo
Muriedas, Juan Pedro and Götz, Markus and Hagemeier, Björn
and Knechtges, Philipp and Krajsek, Kai and Rüttgers,
Alexander and Streit, Achim and Tarnawa, Michael},
title = {{S}caling data-intensive analytics with {H}eat: a {P}ython
library for massively-parallel array computing and machine
learning},
reportid = {FZJ-2023-05813},
year = {2023},
abstract = {Manipulating and processing massive data sets is
challenging. For the vast majority of research communities,
those without a background in high-performance computing,
the standard approach involves breaking up and analyzing
data in smaller chunks, an inefficient and very
prone-to-errors process.The Helmholtz Analytics Toolkit
(Heat) library offers a solution to this problem by
providing memory-distributed and hardware-accelerated array
manipulation, data analytics, and machine learning
algorithms in Python. Developed in collaboration by three
institutions of the Helmholtz Association (KIT, FZJ, DLR),
Heat: enables memory distribution of n-dimensional arrays,
adopts PyTorch as process-local compute engine (hence
supporting GPU-acceleration), provides memory-distributed
(i.e., multi-node, multi-GPU) array operations and
algorithms, optimizing asynchronous MPI-communication under
the hood, and wraps functionalities in NumPy- or
scikit-learn-like API to achieve porting of existing
applications with minimal changes.In this presentation, we
will provide an overview of the Heat library's features and
capabilities and discuss its role in the ecosystem of
distributed array computing and machine learning in Python.
Additionally, we will highlight Heat's role as a platform
for cross-discipline collaboration in data-intensive
research, and address technical and operational challenges
in Heat development.},
month = {Jun},
date = {2023-06-12},
organization = {Helmholtz AI Conference, Hamburg
(Germany), 12 Jun 2023 - 14 Jun 2023},
subtyp = {After Call},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
(POF4-511) / SLNS - SimLab Neuroscience (Helmholtz-SLNS)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
G:(DE-Juel1)Helmholtz-SLNS},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1019998},
}