% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Teijeiro:173013,
      author       = {Teijeiro, Carlos and Taboada, Guillermo L. and Touriño,
                      Juan and Doallo, Ramón and Mouriño, José C. and Alvarez
                      Mallon, Damian and Wibecan, Brian},
      title        = {{D}esign and {I}mplementation of an {E}xtended
                      {C}ollectives {L}ibrary for {U}nified {P}arallel {C}},
      journal      = {Journal of computer science and technology},
      volume       = {28},
      number       = {1},
      issn         = {1860-4749},
      address      = {Boston, Mass. [u.a.]},
      publisher    = {Springer},
      reportid     = {FZJ-2014-06427},
      pages        = {72 - 89},
      year         = {2013},
      abstract     = {Unified Parallel C (UPC) is a parallel extension of ANSI C
                      based on the Partitioned Global Address Space (PGAS)
                      programming model, which provides a shared memory view that
                      simplifies code development while it can take advantage of
                      the scalability of distributed memory architectures.
                      Therefore, UPC allows programmers to write parallel
                      applications on hybrid shared/distributed memory
                      architectures, such as multi-core clusters, in a more
                      productive way, accessing remote memory by means of
                      different high-level language constructs, such as
                      assignments to shared variables or collective primitives.
                      However, the standard UPC collectives library includes a
                      reduced set of eight basic primitives with quite limited
                      functionality. This work presents the design and
                      implementation of extended UPC collective functions that
                      overcome the limitations of the standard collectives
                      library, allowing, for example, the use of a specific source
                      and destination thread or defining the amount of data
                      transferred by each particular thread. This library fulfills
                      the demands made by the UPC developers community and
                      implements portable algorithms, independent of the specific
                      UPC compiler/runtime being used. The use of a representative
                      set of these extended collectives has been evaluated using
                      two applications and four kernels as case studies. The
                      results obtained confirm the suitability of the new library
                      to provide easier programming without trading off
                      performance, thus achieving high productivity in parallel
                      programming to harness the performance of hybrid
                      shared/distributed memory architectures in high performance
                      computing.},
      cin          = {JSC},
      ddc          = {004},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {41G - Supercomputer Facility (POF2-41G21)},
      pid          = {G:(DE-HGF)POF2-41G21},
      typ          = {PUB:(DE-HGF)16},
      UT           = {WOS:000314190600007},
      doi          = {10.1007/s11390-013-1313-9},
      url          = {https://juser.fz-juelich.de/record/173013},
}