% IMPORTANT: The following is UTF-8 encoded. This means that in the presence % of non-ASCII characters, it will not work with BibTeX 0.99 or older. % Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or % “biber”. @INPROCEEDINGS{Comito:1019995, author = {Comito, Claudia and Götz, Markus and Gutiérrez Hermosillo Muriedas, Juan Pedro and Hagemeier, Björn and Hoppe, Fabian and Knechtges, Philipp and Krajsek, Kai and Rüttgers, Alexander and Streit, Achim and Tarnawa, Michael}, title = {{A}ccelerating massive data processing in {P}ython with {H}eat}, reportid = {FZJ-2023-05810}, year = {2023}, abstract = {Heat [1, 2] is an open-source Python library designed to address the challenges of working with massive data sets and harnessing the power of machine learning across disciplines. Developed collaboratively by within the Helmholtz Association (FZJ, KIT, and DLR), Heat offers cutting-edge capabilities for high-performance data analytics, machine learning, and deep learning.Heat provides a Numpy-like API that simplifies the development of scalable, GPU-accelerated applications. What sets Heat apart is its underlying data-parallelism, implemented on top of MPI, which significantly enhances efficiency and performance of data processing compared to traditional task-parallel frameworks.By exploring practical use cases in space science (materials engineering, atmospheric modeling, anomaly detection) and its potential as a backend for diverse data processing pipelines, we will illustrate how Heat can accelerate AI research and applications.[1] Götz, M., Debus, C., Coquelin, et al.: "HeAT - a Distributed and GPU-accelerated Tensor Framework for Data Analytics" [2] https://github.com/helmholtz-analytics/heat}, month = {Sep}, date = {2023-09-27}, organization = {Artificial Intelligence Symposium on Theory, Application and Research 2023, ESOC, Darmstadt (Germany), 27 Sep 2023 - 28 Sep 2023}, subtyp = {After Call}, cin = {JSC}, cid = {I:(DE-Juel1)JSC-20090406}, pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs (SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs) and Research Groups (POF4-511) / SLNS - SimLab Neuroscience (Helmholtz-SLNS)}, pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 / G:(DE-Juel1)Helmholtz-SLNS}, typ = {PUB:(DE-HGF)24}, url = {https://juser.fz-juelich.de/record/1019995}, }