% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Maloney:1031786,
author = {Maloney, Samuel and Suarez, Estela and Eicker, Norbert and
Guimaraes, Filipe and Frings, Wolfgang},
title = {{A}nalyzing {HPC} {M}onitoring {D}ata {W}ith a {V}iew
{T}owards {E}fficient {R}esource {U}tilization},
journal = {2643-3001},
publisher = {IEEE},
reportid = {FZJ-2024-05813},
pages = {170-181},
year = {2024},
note = {The data used for this study are available at:
https://doi.org/10.26165/JUELICH-DATA/BDFBPQ
979-8-3503-5616-8/24/$31.00 © 2024 IEEE. Personal use of
this material is permitted. Permission from IEEE must be
obtained for all other uses, in any current or future media,
including reprinting/republishing this material for
advertising or promotional purposes, creating new collective
works, for resale or redistribution to servers or lists, or
reuse of any copyrighted component of this work in other
works.},
comment = {2024 IEEE 36th International Symposium on Computer
Architecture and High Performance Computing (SBAC-PAD)},
booktitle = {2024 IEEE 36th International Symposium
on Computer Architecture and High
Performance Computing (SBAC-PAD)},
abstract = {Compute nodes in modern HPC systems are growing in size and
their hardware has become ever more diverse. Still, many HPC
centers allocate the resources of full nodes exclusively to
avoid contention, despite the associated risk of
underutilization. This paper describes a thorough resource
utilization study of CPU and GPU compute and memory
capacity, and interconnect bandwidth on JUWELS, a mature
leadership-class modular supercomputer, with the aim of
identifying opportunities for improving utilization through
advanced scheduling and node sharing. Separate analysis of
CPU-only and GPU-accelerated nodes finds that CPU compute
usage is already close to optimal for the CPU-only nodes,
whereas there is plenty of scope for co-scheduling CPU-based
jobs on GPU-accelerated nodes. Memory capacity and
node-level interconnect bandwidth are sufficient to
provision co-scheduled jobs. We analyze multiple one-month
datasets to validate robustness of conclusions over time and
compare with previous studies on other systems to establish
generalizability of results.},
month = {Nov},
date = {2024-11-13},
organization = {2024 IEEE 36th International Symposium
on Computer Architecture and High
Performance Computing, Hilo, HI (USA),
13 Nov 2024 - 15 Nov 2024},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / 5122 - Future Computing
$\&$ Big Data Systems (POF4-512) / DEEP-SEA - DEEP –
SOFTWARE FOR EXASCALE ARCHITECTURES (955606) / ATMLAO - ATML
Application Optimization and User Service Tools (ATMLAO)},
pid = {G:(DE-HGF)POF4-5112 / G:(DE-HGF)POF4-5122 /
G:(EU-Grant)955606 / G:(DE-Juel-1)ATMLAO},
typ = {PUB:(DE-HGF)16 / PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.1109/SBAC-PAD63648.2024.00023},
url = {https://juser.fz-juelich.de/record/1031786},
}