% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Dachsel:150587,
      author       = {Dachsel, Holger and Hofmann, Michael and Lang, Jens and
                      Runger, Gudula},
      title        = {{A}utomatic {T}uning of the {F}ast {M}ultipole {M}ethod
                      {B}ased on {I}ntegrated {P}erformance {P}rediction},
      publisher    = {IEEE},
      reportid     = {FZJ-2014-00636},
      pages        = {617-624},
      year         = {2012},
      comment      = {2012 IEEE 14th International Conference on High Performance
                      Computing and Communication $\&$ 2012 IEEE 9th International
                      Conference on Embedded Software and Systems},
      booktitle     = {2012 IEEE 14th International
                       Conference on High Performance
                       Computing and Communication $\&$ 2012
                       IEEE 9th International Conference on
                       Embedded Software and Systems},
      abstract     = {The Fast Multipole Method (FMM) is an efficient, widely
                      used method for the solution of N-body problems. One of the
                      main data structures is a hierarchical tree data structure
                      describing the separation into near-field and far-field
                      particle interactions. This article presents a method for
                      automatic tuning of the FMM by selecting the optimal FMM
                      tree depth based on an integrated performance prediction of
                      the FMM computations. The prediction method exploits
                      benchmarking of significant parts of the FMM implementation
                      to adapt the tuning to the specific hardware system being
                      used. Furthermore, a separate analysis phase at runtime is
                      used to predict the computational load caused by the
                      specific particle system to be computed. The tuning method
                      was integrated into an FMM implementation. Performance
                      results show that a reliable determination of the tree depth
                      is achieved, thus leading to minimal execution times of the
                      FMM algorithm.},
      month         = {Jun},
      date          = {2012-06-25},
      organization  = {2012 IEEE 14th Int'l Conf. on High
                       Performance Computing and Communication
                       (HPCC) $\&$ 2012 IEEE 9th Int'l Conf.
                       on Embedded Software and Systems
                       (ICESS), Liverpool (United Kingdom), 25
                       Jun 2012 - 27 Jun 2012},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {411 - Computational Science and Mathematical Methods
                      (POF2-411)},
      pid          = {G:(DE-HGF)POF2-411},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      UT           = {WOS:000310377500079},
      doi          = {10.1109/HPCC.2012.88},
      url          = {https://juser.fz-juelich.de/record/150587},
}