% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MASTERSTHESIS{Saglam:1028952,
author = {Saglam, Berk},
title = {{H}eterogeneous {M}emory {A}ware {P}refetching on {H}igh
{P}erformance {A}rm {P}rocessors},
school = {Rheinische Friedrich-Wilhelms-Universität Bonn},
type = {Masterarbeit},
reportid = {FZJ-2024-04892},
pages = {142},
year = {2024},
note = {Masterarbeit, Rheinische Friedrich-Wilhelms-Universität
Bonn, 2024},
abstract = {Modern computing often sees up to $80\%$ of computation
time spent on data retrieval,emphasizing the importance of
prefetching for enhancing CPU data delivery speeds bymoving
data from slower storage to faster caches. Balancing
timeliness and aggressivenessis crucial for reducing access
times. Utilizing heterogeneous memory, in this contextHBM2
and DDR5, serve different roles due to their bandwidth and
capacity trade-offs, underscoring the need for balanced
memory management and awareness whileprefetching.This work
focuses on developing prefetching strategies for
heterogeneous memoryconfigurations in high-performance Arm
processors, targeting a system architecturecomprising 20
cores, with 16 cores dedicated to HBM2 and 4 cores dedicated
to DDR5memory. The primary objective is to reduce latency
and improve system performanceby introducing two innovative
optimization strategies for prefetching. These
strategiesmeticulously balance timeliness and aggressiveness
by adaptively tuning the prefetchdegree and distance. These
strategies adapt dynamically to the specific memory type
andavailable bandwidth with consideration of the prefetch
accuracy, optimizing prefetchingoperations for enhanced
performance and efficiency. The Prefetcher are integrated
withthe L2 cache and its performance is rigorously assessed
through Gem5 simulations. Theseevaluations compare the
effectiveness of adaptive optimization strategies for both
Streamand PC-based Stride Prefetchers, utilizing the Arm
Neoverse V1 as the computationalmodel.Findings reveal
adaptive prefetching is boosting system performance, notably
with HBM2and DDR5 Memory, while facing memory contention on
DDR5. This research advancesprefetching strategies with the
understanding of heterogeneous memory, advocatingfurther
exploration to enhance high-performance computing efficiency
and performance.},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5122 - Future Computing $\&$ Big Data Systems (POF4-512)},
pid = {G:(DE-HGF)POF4-5122},
typ = {PUB:(DE-HGF)19},
doi = {10.34734/FZJ-2024-04892},
url = {https://juser.fz-juelich.de/record/1028952},
}