% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Teijeiro:173013,
author = {Teijeiro, Carlos and Taboada, Guillermo L. and Touriño,
Juan and Doallo, Ramón and Mouriño, José C. and Alvarez
Mallon, Damian and Wibecan, Brian},
title = {{D}esign and {I}mplementation of an {E}xtended
{C}ollectives {L}ibrary for {U}nified {P}arallel {C}},
journal = {Journal of computer science and technology},
volume = {28},
number = {1},
issn = {1860-4749},
address = {Boston, Mass. [u.a.]},
publisher = {Springer},
reportid = {FZJ-2014-06427},
pages = {72 - 89},
year = {2013},
abstract = {Unified Parallel C (UPC) is a parallel extension of ANSI C
based on the Partitioned Global Address Space (PGAS)
programming model, which provides a shared memory view that
simplifies code development while it can take advantage of
the scalability of distributed memory architectures.
Therefore, UPC allows programmers to write parallel
applications on hybrid shared/distributed memory
architectures, such as multi-core clusters, in a more
productive way, accessing remote memory by means of
different high-level language constructs, such as
assignments to shared variables or collective primitives.
However, the standard UPC collectives library includes a
reduced set of eight basic primitives with quite limited
functionality. This work presents the design and
implementation of extended UPC collective functions that
overcome the limitations of the standard collectives
library, allowing, for example, the use of a specific source
and destination thread or defining the amount of data
transferred by each particular thread. This library fulfills
the demands made by the UPC developers community and
implements portable algorithms, independent of the specific
UPC compiler/runtime being used. The use of a representative
set of these extended collectives has been evaluated using
two applications and four kernels as case studies. The
results obtained confirm the suitability of the new library
to provide easier programming without trading off
performance, thus achieving high productivity in parallel
programming to harness the performance of hybrid
shared/distributed memory architectures in high performance
computing.},
cin = {JSC},
ddc = {004},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {41G - Supercomputer Facility (POF2-41G21)},
pid = {G:(DE-HGF)POF2-41G21},
typ = {PUB:(DE-HGF)16},
UT = {WOS:000314190600007},
doi = {10.1007/s11390-013-1313-9},
url = {https://juser.fz-juelich.de/record/173013},
}