% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Aach:1048916,
author = {Aach, Marcel and Blanc, Cyril and Lintermann, Andreas and
De Grave, Kurt},
title = {{O}ptimizing {E}dge {AI} {M}odels on {HPC} {S}ystems
with the {E}dge in the {L}oop},
volume = {16091},
address = {Cham},
publisher = {Springer Nature Switzerland},
reportid = {FZJ-2025-05015},
isbn = {978-3-032-07611-3 (print)},
series = {Lecture Notes in Computer Science},
pages = {148 - 161},
year = {2026},
comment = {High Performance Computing},
booktitle = {High Performance Computing},
abstract = {Artificial Intelligence (AI) and Machine Learning (ML)
models deployed on edge devices, e.g., for quality control
in Additive Manufacturing (AM), are frequently small in
size. Such models usually have to deliver highly accurate
results within a short time frame. Methodsthat are commonly
employed in literature start out with larger trained models
and try to reduce their memory and latency footprint by
structural pruning, knowledge distillation, or quantization.
It is, however, also possible to leverage hardware-aware
Neural Architecture Search (NAS), an approach that seeks to
systematically explore the architecture space to find
optimized configurations. In this study, a hardware-aware
NAS workflow is introduced that couples an edge device
located in Belgium with a powerful High-Performance
Computing (HPC) system in Germany, to train possible
architecture candidates as fast as possible while performing
real-time latency measurements on the target hardware. The
approach is verified on a use case in the AM domain, based
on the open RAISE-LPBF dataset, achieving ≈ 8.8 times
faster inference speed while simultaneously enhancing model
quality by a factor of ≈ 1.35, compared to a
human-designed baseline.},
month = {Jun},
date = {2025-06-10},
organization = {ISC High Performance 2025, Hamburg
(Germany), 10 Jun 2025 - 13 Jun 2025},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / RAISE - Research on
AI- and Simulation-Based Engineering at Exascale (951733)},
pid = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)951733},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.1007/978-3-032-07612-0_12},
url = {https://juser.fz-juelich.de/record/1048916},
}