% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Becker:10841,
      author       = {Becker, Daniel},
      title        = {{T}imestamp {S}ynchronization of {C}oncurrent {E}vents},
      volume       = {4},
      school       = {RWTH Aachen},
      type         = {Dr. (FH)},
      address      = {Jülich},
      publisher    = {Forschungszentrum Jülich GmbH Zentralbibliothek, Verlag},
      reportid     = {PreJuSER-10841},
      isbn         = {978-3-89336-625-5},
      series       = {Schriften des Forschungszentrums Jülich : IAS Series},
      pages        = {XVIII, 116 S.},
      year         = {2010},
      note         = {Record converted from VDB: 12.11.2012; RWTH Aachen, Diss.,
                      2010},
      abstract     = {Supercomputing is a key technological pillar of modern
                      science and engineering, indispensable for solving critical
                      problems of high complexity. However, to effectively utilize
                      the enormously complex large-scale computer systems
                      available today, scientists and engineers need powerful and
                      robust software development tools. One technique widely used
                      by such tools is event tracing with a broad spectrum of
                      applications ranging from performance analysis, performance
                      prediction and modeling to debugging. In particular, event
                      traces are helpful in understanding the performance behavior
                      of parallel programs since they allow the in-depth analysis
                      of communication and synchronization patterns. The accuracy
                      of such analyses depends on the comparability of timestamps
                      taken on different processors and may be adversely affected
                      by non-synchronized clocks leading to inaccurate relative
                      event timings. Such inaccuracies may cause a given interval
                      to appear shorter or longer than it actually was, or
                      introduce violations of the logical event order, which
                      requires a message to be received only after it has been
                      sent. Inconsistent trace data may not only lead to false
                      conclusions, for instance, when the impact of communication
                      patterns is quantified, but may also confuse the user of
                      trace-visualization tools by causing message arrows to point
                      backward in time-line views. Even more strikingly,
                      trace-analysis tools may also cease to work in a
                      satisfactorymanner if they rely on the correct order to
                      function properly. Although linear offset interpolation can
                      restore the consistency of the trace data to some degree,
                      time-dependent drifts and other inaccuracies may still
                      disarrange the original sequence of events, as shown in a
                      study conducted as a part of this Ph.D. thesis. The already
                      familiar controlled logical clock algorithm accounts for
                      such violations in point-to-point communication by shifting
                      message events in time as much as needed while trying to
                      preserve the length of local intervals. This algorithm is,
                      however, not suitable for realistic applications because (i)
                      it ignores collective and shared-memory operations and (ii)
                      as a serial algorithm it offers only limited scalability.
                      This thesis addresses these shortcomings by extending the
                      algorithm to restore event semantics related to collective
                      and shared-memory operations and by parallelizing the
                      extended version to make it suitable for large-scale systems
                      including computational grids. The basic idea behind the
                      semantic extension is to consider collective and
                      shared-memory operations as being composed of multiple
                      point-to-point messages, taking the semantics of the
                      different flavors of these operations into account. In order
                      to accomplish the correction in a scalable way, both
                      distributed memory and parallel processing capabilities are
                      exploited by processing separate local trace files in
                      parallel and replaying the original communication on as many
                      CPUs as were used to execute the target application itself.
                      To employ the replay mechanism in computational grids, this
                      work also defines the necessary infrastructure to accurately
                      measure clock offsets in distributed environments with
                      hierarchical networks. The methodology was evaluated in
                      practice by integrating the extended and parallelized
                      algorithm into the Scalasca trace-analysis framework and
                      applied to traces of realistic applications taken on single
                      cluster systems and computational grids. The thesis shows
                      that the algorithm eliminates inconsistent timings of
                      concurrent events while onlymarginally changing the length
                      of intervals between local events – even if wide-area
                      communication is involved. Scalability is demonstrated with
                      up to 4,096 application processes.},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {Scientific Computing (FUEK411) / 411 - Computational
                      Science and Mathematical Methods (POF2-411) / ATMLPP - ATML
                      Parallel Performance (ATMLPP)},
      pid          = {G:(DE-Juel1)FUEK411 / G:(DE-HGF)POF2-411 /
                      G:(DE-Juel-1)ATMLPP},
      typ          = {PUB:(DE-HGF)11 / PUB:(DE-HGF)3},
      url          = {https://juser.fz-juelich.de/record/10841},
}