% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Hertzberger:860345,
      author       = {Lippert, Th. and Petkov, N. and Schilling, K.},
      editor       = {Hertzberger, Bob and Sloot, Peter},
      title        = {{BLAS}-3 for the quadrics parallel computer},
      volume       = {1225},
      address      = {Berlin, Heidelberg},
      publisher    = {Springer Berlin Heidelberg},
      reportid     = {FZJ-2019-01120},
      isbn         = {978-3-540-62898-9 (print)},
      series       = {Lecture Notes in Computer Science},
      pages        = {332 - 341},
      year         = {1997},
      comment      = {High-Performance Computing and Networking / Hertzberger,
                      Bob (Editor) ; Berlin, Heidelberg : Springer Berlin
                      Heidelberg, 1997, Chapter 32 ; ISSN: 0302-9743=1611-3349 ;
                      ISBN: 978-3-540-62898-9=978-3-540-69041-2 ;
                      doi:10.1007/BFb0031573},
      booktitle     = {High-Performance Computing and
                       Networking / Hertzberger, Bob (Editor)
                       ; Berlin, Heidelberg : Springer Berlin
                       Heidelberg, 1997, Chapter 32 ; ISSN:
                       0302-9743=1611-3349 ; ISBN:
                       978-3-540-62898-9=978-3-540-69041-2 ;
                       doi:10.1007/BFb0031573},
      abstract     = {A scalable parallel algorithm for matrix multiplication on
                      SISAMD computers is presented. Our method enables us to
                      implement an efficient BLAS library on the Italian
                      APE100/Quadrics SISAMD massively parallel computer on which
                      hitherto scalable parallel BLAS-3 were not available. The
                      approach proposed is based on a one-dimensional ring
                      connectivity. The flow of data is hyper-systolic. The
                      communication overhead is competitive with that of
                      established algorithms for SIMD and MIMD machines.
                      Advantages are that (i) the layout of the matrices is
                      preserved during the computation, (ii) BLAS-2 fit well into
                      this layout and (iii) indexed addressing is avoided, which
                      renders the algorithm suitable for SISAMD machines and, in
                      this way, for all other types of parallel computers. On the
                      APE100/Quadrics, a performance of nearly 25 $\%$ of the peak
                      performance for multiplications of complex matrices is
                      achieved.},
      month         = {Apr},
      date          = {1997-04-28},
      organization  = {International Conference on
                       High-Performance Computing and
                       Networking, Vienna (Austria), 28 Apr
                       1997 - 30 Apr 1997},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1007/BFb0031605},
      url          = {https://juser.fz-juelich.de/record/860345},
}