% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Aach:1048916,
      author       = {Aach, Marcel and Blanc, Cyril and Lintermann, Andreas and
                      De Grave, Kurt},
      title        = {{O}ptimizing {E}dge {AI} {M}odels on {HPC} {S}ystems
                      with the {E}dge in the {L}oop},
      volume       = {16091},
      address      = {Cham},
      publisher    = {Springer Nature Switzerland},
      reportid     = {FZJ-2025-05015},
      isbn         = {978-3-032-07611-3 (print)},
      series       = {Lecture Notes in Computer Science},
      pages        = {148 - 161},
      year         = {2026},
      comment      = {High Performance Computing},
      booktitle     = {High Performance Computing},
      abstract     = {Artificial Intelligence (AI) and Machine Learning (ML)
                      models deployed on edge devices, e.g., for quality control
                      in Additive Manufacturing (AM), are frequently small in
                      size. Such models usually have to deliver highly accurate
                      results within a short time frame. Methodsthat are commonly
                      employed in literature start out with larger trained models
                      and try to reduce their memory and latency footprint by
                      structural pruning, knowledge distillation, or quantization.
                      It is, however, also possible to leverage hardware-aware
                      Neural Architecture Search (NAS), an approach that seeks to
                      systematically explore the architecture space to find
                      optimized configurations. In this study, a hardware-aware
                      NAS workflow is introduced that couples an edge device
                      located in Belgium with a powerful High-Performance
                      Computing (HPC) system in Germany, to train possible
                      architecture candidates as fast as possible while performing
                      real-time latency measurements on the target hardware. The
                      approach is verified on a use case in the AM domain, based
                      on the open RAISE-LPBF dataset, achieving ≈ 8.8 times
                      faster inference speed while simultaneously enhancing model
                      quality by a factor of ≈ 1.35, compared to a
                      human-designed baseline.},
      month         = {Jun},
      date          = {2025-06-10},
      organization  = {ISC High Performance 2025, Hamburg
                       (Germany), 10 Jun 2025 - 13 Jun 2025},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / RAISE - Research on
                      AI- and Simulation-Based Engineering at Exascale (951733)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(EU-Grant)951733},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1007/978-3-032-07612-0_12},
      url          = {https://juser.fz-juelich.de/record/1048916},
}