% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Lippert:860344,
      author       = {Lippert, Th. and Schilling, K. and Toschi, F. and
                      Trentmann, S. and Tripiccione, R.},
      title        = {{T}ranspose algorithm for {FFT} on {APE}/{Q}uadrics},
      volume       = {1401},
      address      = {Berlin, Heidelberg},
      publisher    = {Springer Berlin Heidelberg},
      reportid     = {FZJ-2019-01119},
      isbn         = {978-3-540-64443-9 (print)},
      series       = {Lecture Notes in Computer Science},
      pages        = {439 - 448},
      year         = {1998},
      comment      = {High-Performance Computing and Networking / Sloot, Peter
                      (Editor) ; Berlin, Heidelberg : Springer Berlin Heidelberg,
                      1998, Chapter 46 ; ISSN: 0302-9743=1611-3349 ; ISBN:
                      978-3-540-64443-9=978-3-540-69783-1 ;
                      doi:10.1007/BFb0037125},
      booktitle     = {High-Performance Computing and
                       Networking / Sloot, Peter (Editor) ;
                       Berlin, Heidelberg : Springer Berlin
                       Heidelberg, 1998, Chapter 46 ; ISSN:
                       0302-9743=1611-3349 ; ISBN:
                       978-3-540-64443-9=978-3-540-69783-1 ;
                       doi:10.1007/BFb0037125},
      abstract     = {We describe a novel practical parallel FFT scheme designed
                      for SIMD systems and/or data parallel programming. A
                      bit-exchange of elements between the processors is avoided
                      by means of the ‘Transpose Algorithm’. Our transposition
                      is based on the assignment of the data field onto a
                      1-dimensional ring of systolic cells which subsequently is
                      mapped onto a ring of processors, realized as a subset of
                      the system's connectivity. We have implemented and
                      benchmarked a 2-dimensional parallel FFT code on the
                      APE100/Quadrics parallel computer, where–due to a rigid
                      next-neighbour connectivity and lack of local
                      addressing–efficient FFT implementations could not be
                      realized so far.},
      month         = {Apr},
      date          = {1998-04-21},
      organization  = {International Conference on
                       High-Performance Computing and
                       Networking, Amsterdam (The
                       Netherlands), 21 Apr 1998 - 23 Apr
                       1998},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1007/BFb0037171},
      url          = {https://juser.fz-juelich.de/record/860344},
}