% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Portero:1019434,
      author       = {Portero, Antonio and Falquez, Carlos and Ho, Nam and
                      Petrakis, Polydoros and Nassyr, Stepan and Marazakis,
                      Manolis and Dolbeau, Romain and Nocua Cifuentes, Jorge A.
                      and Beltran, Luis and Pleiter, Dirk and Suarez, Estela},
      title        = {{COMPESCE}: {A} {C}o-design {A}pproach for {M}emory
                      {S}ubsystem {P}erformance {A}nalysis in {HPC}
                      {M}any-{C}ores},
      volume       = {13949},
      address      = {Cham},
      publisher    = {Springer Nature Switzerland},
      reportid     = {FZJ-2023-05391},
      isbn         = {978-3-031-42784-8},
      series       = {Lecture Notes in Computer Science},
      pages        = {105-119},
      year         = {2023},
      note         = {Grant Name: EPI-SGA2},
      comment      = {Architecture of Computing Systems - 36th International
                      Conference},
      booktitle     = {Architecture of Computing Systems -
                       36th International Conference},
      abstract     = {This paper explores the memory subsystem design through
                      gem5 simulations of a non-uniform memory access (NUMA)
                      architecture with ARM cores equipped with vector engines.
                      And connected to a Network-on-Chip (NoC) following the
                      Coherent Hub Interface (CHI) protocol. The study quantifies
                      the benefits of vectorization, prefetching, and multichannel
                      NoC configurations using a benchmark for generating memory
                      patterns and indexed accesses. The outcomes provide insights
                      into improving bus utilization and bandwidth and reducing
                      stalls in the system. The paper proposes hardware/software
                      (HW/SW) advancements to reach and use the HBM device with a
                      higher percentage than $80\%$ at the memory controllers in
                      the simulated manycore system.},
      month         = {Jun},
      date          = {2023-06-13},
      organization  = {Architecture of Computing Systems -
                       36th International Conference, Athens
                       (Greece), 13 Jun 2023 - 15 Jun 2023},
      cin          = {JSC / IAS},
      cid          = {I:(DE-Juel1)JSC-20090406 / I:(DE-Juel1)VDB1106},
      pnm          = {5122 - Future Computing $\&$ Big Data Systems (POF4-512) /
                      EPI SGA1 - SGA1 (Specific Grant Agreement 1) OF THE EUROPEAN
                      PROCESSOR INITIATIVE (EPI) (826647)},
      pid          = {G:(DE-HGF)POF4-5122 / G:(EU-Grant)826647},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      UT           = {WOS:001293532100008},
      doi          = {10.1007/978-3-031-42785-5_8},
      url          = {https://juser.fz-juelich.de/record/1019434},
}