% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Comito:1019996,
author = {Comito, Claudia and Hoppe, Fabian and Götz, Markus and
Gutiérrez Hermosillo Muriedas, Juan Pedro and Hagemeier,
Björn and Knechtges, Philipp and Krajsek, Kai and
Rüttgers, Alexander and Streit, Achim and Tarnawa, Michael},
title = {{H}eat: accelerating massive data processing in {P}ython},
reportid = {FZJ-2023-05811},
year = {2023},
abstract = {Manipulating and processing massive data sets is
challenging. In astrophysics as in the vast majority of
research communities, the standard approach involves
breaking up and analyzing data in smaller chunks, a process
that is both inefficient and prone to errors. The problem is
exacerbated on GPUs, because of the smaller available
memory.Popular solutions to distribute NumPy/SciPy
computations are based on task parallelism, introducing
significant runtime overhead, complicating implementation,
and often limiting GPU support to one vendor.This poster
illustrates an alternative based on data parallelism
instead. The open-source library Heat [1, 2] builds on
PyTorch and mpi4py to simplify porting of NumPy/SciPy-based
code to GPU (CUDA, ROCm, including multi-GPU, multi-node
clusters). Under the hood, Heat distributes massive
memory-intensive operations over multi-node resources via
MPI communication. From a user's perspective, Heat can be
used seamlessly in the Python array ecosystem. Supported
features:- distributed (multi-GPU) I/O from shared memory-
easy distribution of memory-intensive operations in existing
code (e.g. matrix multiplication)- interoperability within
the Python array ecosystem: Heat as a backend for your
massive array manipulations, statistics, signal processing,
machine learning...- transparent parallelism: prototype on
your laptop, run the same code on HPC cluster.I'll also
touch upon Heat's current implementation roadmap, and
possible paths to collaboration.[1]
https://github.com/helmholtz-analytics/heat[2] M. Götz et
al., "HeAT – a Distributed and GPU-accelerated Tensor
Framework for Data Analytics," 2020 IEEE International
Conference on Big Data (Big Data), Atlanta, GA, USA, 2020,
pp. 276-287, doi: 10.1109/BigData50022.2020.9378050.},
month = {Nov},
date = {2023-11-29},
organization = {CS $\&$ Physics Meet-Up by Lamarr $\&$
B3D, TU Dortmund (Germany), 29 Nov 2023
- 1 Dec 2023},
subtyp = {Outreach},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
(POF4-511) / SLNS - SimLab Neuroscience (Helmholtz-SLNS)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
G:(DE-Juel1)Helmholtz-SLNS},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/1019996},
}