% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INBOOK{Hermanns:830159,
      author       = {Hermanns, Marc-André and Geimer, Markus and Mohr, Bernd
                      and Wolf, Felix},
      title        = {{T}race-{B}ased {D}etection of {L}ock {C}ontention in {MPI}
                      {O}ne-{S}ided {C}ommunication},
      address      = {Cham},
      publisher    = {Springer International Publishing},
      reportid     = {FZJ-2017-03736},
      pages        = {97-114},
      year         = {2017},
      comment      = {Tools for High Performance Computing 2016 / Niethammer,
                      Christoph (Editor) ; Cham : Springer International
                      Publishing, 2017, Chapter 6 ; ISBN: 978-3-319-56701-3},
      booktitle     = {Tools for High Performance Computing
                       2016 / Niethammer, Christoph (Editor) ;
                       Cham : Springer International
                       Publishing, 2017, Chapter 6 ; ISBN:
                       978-3-319-56701-3},
      abstract     = {Performance analysis is an essential part of the
                      development process of HPC applications. Thus, developers
                      need adequate tools to evaluate design and implementation
                      decisions to effectively develop efficient parallel
                      applications. Therefore, it is crucial that tools provide an
                      as complete support as possible for the available language
                      and library features to ensure that design decisions are not
                      negatively influenced by the level of available tool
                      support. The message passing interface (MPI) supports three
                      basic communication paradigms: point-to-point, collective,
                      and one-sided. Each of these targets and excels at a
                      specific application scenario. While current performance
                      tools support the first two quite well, one-sided
                      communication is often neglected. In our earlier work, we
                      were able to reduce this gap by showing how wait states in
                      MPI one-sided communication using active-target
                      synchronization can be detected at large scale using our
                      trace-based message replay technique. Further extending our
                      work on the detection of progress-related wait states in
                      ARMCI, this paper presents an improved infrastructure that
                      is capable of not only detecting progress-related wait
                      states, but also wait states due to lock contention in MPI
                      passive-target synchronization. We present an event-based
                      definition of lock contention, the trace-based algorithm to
                      detect it, as well as initial results with a micro-benchmark
                      and an application kernel scaling up to 65,536 processes.},
      month         = {Oct},
      date          = {2016-10-04},
      organization  = {10th International Workshop on
                       Parallel Tools for High Performance
                       Computing, Stuttgart (Germany), 4 Oct
                       2016 - 5 Oct 2016},
      cin          = {JARA-HPC / JSC},
      cid          = {$I:(DE-82)080012_20140620$ / I:(DE-Juel1)JSC-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511) / ATMLPP - ATML Parallel Performance (ATMLPP)},
      pid          = {G:(DE-HGF)POF3-511 / G:(DE-Juel-1)ATMLPP},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1007/978-3-319-56702-0_6},
      url          = {https://juser.fz-juelich.de/record/830159},
}