% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Schtzle:1050049,
author = {Schätzle, Fabian and Falquez, Carlos and Ho, Nam and
Zambanini, André and van den Boom, Johannes and Suarez,
Estela},
title = {{M}odeling {C}hiplet-to-{C}hiplet ({C}2{C}) {C}ommunication
for {C}hiplet-based {C}o-{D}esign},
publisher = {IEEE},
reportid = {FZJ-2025-05763},
pages = {11},
year = {2025},
comment = {ISC High Performance 2025 Research Paper Proceedings (40th
International Conference) - IEEE, 2025. - ISBN
978-3-9826336-1-9 - doi:10.23919/ISC.2025.11018303},
booktitle = {ISC High Performance 2025 Research
Paper Proceedings (40th International
Conference) - IEEE, 2025. - ISBN
978-3-9826336-1-9 -
doi:10.23919/ISC.2025.11018303},
abstract = {Chiplet-based processor design, which combines small dies
called chiplets to form a larger chip, enables scalable
designs at economical costs. This trend has received high
attention such that standards for chiplet design have
rapidly established, including packaging, protocols, and
Chiplet-to-Chiplet (C2C) interfaces. With numerous
well-defined chiplet options available, hardware architects
would leverage on the co-design process to make optimal
decisions on design parameters. An important performance
limitation in multi-chiplet designs come from the protocol
translation in the C2C communication, needed to maintain
cache coherency and avoid risk of deadlocks. When
integrating multiple chiplets, deadlocks can happen from
both protocol and routing, making deadlock-free designs
important. This paper tackles these challenges by
introducing a Chiplet-to-Chiplet Gateway (C2CG)
architecture, a C2C interface that bridges two chiplet
protocols and ensures deadlock-free C2C communication. We
also extend the Coherent Hub Interface (CHI) protocol to
support cache coherent data sharing among cores across
chiplets. The complete design is implemented in the gem5
simulator, constructing a modeling tool for chiplet-based
co-design targeting next-generation High-performance
Computing (HPC) processors. We demonstrate the benefit of
the model through a design space exploration of three
64-core Armv8 HPC processor configurations: monolithic, two-
and four-chiplet. The exploration, using representative HPC
benchmarks, provides insights into C2C parameters and
studies the impact of Non-Uniform Memory Access (NUMA)
configuration, giving valuable co-design feedback for
hardware architects.},
month = {Jun},
date = {2025-06-10},
organization = {ISC High Performance 2025 Research
Paper Proceedings (40th International
Conference), Hamburg (Germany), 10 Jun
2025 - 13 Jun 2025},
cin = {PGI-4 / JSC},
cid = {I:(DE-Juel1)PGI-4-20110106 / I:(DE-Juel1)JSC-20090406},
pnm = {5234 - Emerging NC Architectures (POF4-523) / 5122 - Future
Computing $\&$ Big Data Systems (POF4-512)},
pid = {G:(DE-HGF)POF4-5234 / G:(DE-HGF)POF4-5122},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.23919/ISC.2025.11018303},
url = {https://juser.fz-juelich.de/record/1050049},
}