% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Hermanns:844062,
      author       = {Hermanns, Marc-André},
      title        = {{U}nderstanding the formation of wait states in one-sided
                      communication},
      volume       = {35},
      school       = {RWTH Aachen},
      type         = {Dr.},
      address      = {Jülich},
      publisher    = {Forschungszentrum Jülich GmbH Zentralbibliothek, Verlag},
      reportid     = {FZJ-2018-01571},
      isbn         = {978-3-95806-297-9},
      series       = {Schriften des Forschungszentrums Jülich. Reihe IAS},
      pages        = {xiv, 144 S.},
      year         = {2018},
      note         = {RWTH Aachen, Diss., 2017},
      abstract     = {Due to the available concurrency in modern-day
                      supercomputers, the complexity of developing efficient
                      parallel applications for these platforms has grown rapidly
                      in the last years. Many applications use message passing for
                      parallelization, offering three main communication
                      paradigms: point-to-point, collective and one-sided
                      communication. Each paradigm fits certain domains of
                      algorithms and communication patterns best. The one-sided
                      paradigm decouples communication and synchronization and
                      allows a single process to define a complete communication.
                      These are important features for runtime systems of new
                      programming paradigms and state-of-the-art dynamic
                      load-balancing strategies. In any process interaction, wait
                      states can occur, where a process is waiting for another -
                      idling - before it proceeds with its local computation. To
                      eliminate such wait states, runtime and application
                      developers alike need support in detecting and quantifying
                      them and their root causes. However, tool support for
                      identifying complex wait states in one-sided communication
                      is scarce. This thesis contributes novel methods for the
                      scalable detection and quantification of wait states in
                      one-sided communication, the automatic identification of
                      their root causes, and the assessment of optimization
                      potential. The methods for wait-state detection and
                      quantification, as introduced by Böhme et al. and extended
                      by this thesis, build upon a parallel post-mortem traversal
                      of process-local event traces, modeling an application's
                      runtime behavior. Performance-relevant data is exchanged
                      just in time on the recorded communication paths. Through
                      the nature of one-sided communication, information on such
                      communication paths is not available on all processes
                      involved, impeding the use of this original approach for
                      one-sided communication. The use of a novel high-level
                      messaging framework enables the exchange of messages on the
                      implicit communication paths of one-sided communication,
                      while retaining the scalability of the original approach.
                      This enables the identification of previously unstudied
                      types of wait states unique to one-sided communication: lack
                      of remote progress and resource contention. Beyond simple
                      accounting of waiting time, other contributed methods allow
                      pinpointing root causes of such wait states and identifying
                      optimization potential in one-sided applications.
                      Furthermore, they distinguish two fundamentally different
                      classes of wait-state root causes: delays for direct process
                      synchronization (similar to point-to-point and collective
                      communication) and contention in case of lock-based process
                      synchronization, whose resolution strategies are
                      diametrically opposed to each other. Finally, the
                      contributed methods enable the identification of the longest
                      wait-state-free execution path (i.e., critical path) in
                      parallel applications using one-sided communication. As only
                      optimization of functions on the critical path will yield
                      performance improvements, its identification is key to
                      choosing promising optimization targets. All of these
                      methods are integrated into the Scalasca performance
                      toolset. Their scalability and effectiveness are
                      demonstrated by evaluating a variety of applications using
                      one-sided communication interfaces running in configurations
                      with up to 65,536 processes.},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511) / ATMLPP - ATML Parallel Performance (ATMLPP)},
      pid          = {G:(DE-HGF)POF3-511 / G:(DE-Juel-1)ATMLPP},
      typ          = {PUB:(DE-HGF)3 / PUB:(DE-HGF)11},
      urn          = {urn:nbn:de:0001-2018012504},
      url          = {https://juser.fz-juelich.de/record/844062},
}