% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{AlvarezMallon:281229,
author = {Alvarez Mallon, Damian and Taboada, Guillermo L. and
Koesterke, Lars},
title = {{MPI} and {UPC} broadcast, scatter and gather algorithms in
{X}eon {P}hi},
journal = {Concurrency and computation},
volume = {28},
number = {8},
issn = {1532-0626},
address = {Chichester},
publisher = {Wiley},
reportid = {FZJ-2016-00928},
pages = {2322–2340},
year = {2016},
abstract = {Accelerators have revolutionised the high performance
computing (HPC) community. Despite their advantages, their
very specific programming models and limited communication
capabilities have kept them in a supporting role of the main
processors. With the introduction of Xeon Phi, this is no
longer true, as it can be programmed as the main processor
and has direct access to the InfiniBand network adapter.
Collective operations play a key role in many HPC
applications. Therefore, studying its behaviour in the
context of manycore coprocessors has great importance. This
work analyses the performance of different algorithms for
broadcast, scatter and gather, in a large-scale Xeon Phi
supercomputer. The algorithms evaluated are those available
in the reference message passing interface (MPI)
implementation for Xeon Phi (Intel MPI), the default
algorithm in an optimised MPI implementation (MVAPICH2-MIC),
and a new set of algorithms, developed by the authors of
this work, designed with modern processors and new
communication features in mind. The latter are implemented
in Unified Parallel C (UPC), a partitioned global address
space language, leveraging one-sided communications,
hierarchical trees and message pipelining. This study scales
the experiments to 15360 cores in the Stampede supercomputer
and compares the results to Xeon and hybrid Xeon + Xeon Phi
experiments, with up to 19456 cores.},
cin = {JSC},
ddc = {004},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {513 - Supercomputer Facility (POF3-513)},
pid = {G:(DE-HGF)POF3-513},
typ = {PUB:(DE-HGF)16},
UT = {WOS:000376263300002},
doi = {10.1002/cpe.3552},
url = {https://juser.fz-juelich.de/record/281229},
}