% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Ho:1042334,
author = {Ho, Nam and FALQUEZ, CARLOS and PORTERO, ANTONI and SUAREZ,
ESTELA and PLEITER, DIRK},
title = {{M}emory {P}refetching {E}valuation of {S}cientific
{A}pplications on {A} {M}odern {HPC} {A}rm-based
{P}rocessor},
journal = {IEEE access},
volume = {13},
issn = {2169-3536},
address = {New York, NY},
publisher = {IEEE},
reportid = {FZJ-2025-02537},
pages = {85898 - 85926},
year = {2025},
abstract = {Memory prefetching is a well-known technique for mitigating
the negative impact of memory access latencies on memory
bandwidth. This problem has become more pressing as
improvements in memory bandwidth have not kept pace with
increases in computational power. While much existing work
has been devoted to finding appropriate prefetching
techniques for specific workloads, few provide insight into
the behavior of scientific applications to better understand
the impact of prefetchers. This paper investigates the
impact of hardware prefetchers on the latest Arm-based
high-end processor architectures. In this work, we
investigate memory access patterns by analyzing locality
properties and visualizing delta and repetitive address
patterns. A deeper understanding of memory access patterns
allows the use of the appropriate prefetcher and reaching a
better correlation between access pattern properties and
prefetcher performance. This can guide future co-design
efforts. We evaluated traditional and innovative prefetchers
using a gem5-based model of Arm Neoverse V1 cores. The model
features a 16-core architecture, using Amazon’s Graviton 3
processor as a hardware reference, but substituting DDR5 by
high bandwidth memory (HBM2). We performed a detailed
prefetching evaluation focusing on stencil, sparse
matrix-vector multiplication, and Breadth-First Search
kernels. These kernels represent a broad range of the
applications running on today’s High-Performance Computing
(HPC) systems, which are sensitive to memory performance.},
cin = {JSC},
ddc = {621.3},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5122 - Future Computing $\&$ Big Data Systems (POF4-512) /
EPI SGA2 (16ME0507K) / EPI SGA1 - SGA1 (Specific Grant
Agreement 1) OF THE EUROPEAN PROCESSOR INITIATIVE (EPI)
(826647)},
pid = {G:(DE-HGF)POF4-5122 / G:(BMBF)16ME0507K /
G:(EU-Grant)826647},
typ = {PUB:(DE-HGF)16},
UT = {WOS:001492121500023},
doi = {10.1109/ACCESS.2025.3569533},
url = {https://juser.fz-juelich.de/record/1042334},
}