% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Brmmel:845503,
      author       = {Brömmel, Dirk and Frings, Wolfgang and Wylie, Brian J. N.
                      and Mohr, Bernd and Gibbon, Paul and Lippert, Thomas},
      title        = {{T}he {H}igh-{Q} {C}lub: {E}xperience with
                      {E}xtreme-scaling {A}pplication {C}odes},
      journal      = {Supercomputing frontiers and innovations},
      volume       = {5},
      number       = {1},
      issn         = {2313-8734},
      address      = {Chelyabinsk},
      publisher    = {South Ural State University},
      reportid     = {FZJ-2018-02737},
      pages        = {59-78},
      year         = {2018},
      abstract     = {Jülich Supercomputing Centre (JSC) started running
                      (extreme) scaling workshops with its first IBM Blue Gene
                      supercomputer, finally spanning three generations each
                      seeing an increase in the number of cores and available
                      threads. Over the years, this workshop series attracted
                      numerous international code teams and resulted in many
                      applications capable of running on all available cores of
                      each system.This article reviews some of the knowledge
                      gained with running and tuning highly-scalable applications,
                      focussing on JUQUEEN, the IBM Blue Gene/Q at JSC. The
                      ability to execute successfully on all 458752 cores with up
                      to 1.8 million processes or threads may qualify codes for
                      the High-Q Club, which serves as a showcase for diverse
                      codes scaling to the entire 28 racks, effectively defining a
                      collection of the highest scaling codes on JUQUEEN. The
                      intention was to encourage other developers to invest in
                      tuning and scaling their codes while identifying the
                      necessary key aspects for that goal.As this era closes, it
                      is timely to compare the characteristics of the 32 High-Q
                      Club member codes, considering their strong and/or weak
                      scaling, exploitation of hardware threading, and whether/how
                      intra-node multi-threading is employed combined with
                      message-passing. We also identify the obstacles for scaling
                      such as inefficient use of limited compute node memory and
                      file I/O as key governing factors. Overall, the analysis
                      provides guidance as to how applications may (need to) be
                      designed in future to exploit expected exa-scale computer
                      systems.},
      cin          = {JSC},
      ddc          = {004},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {511 - Computational Science and Mathematical Methods
                      (POF3-511) / 513 - Supercomputer Facility (POF3-513) /
                      ATMLPP - ATML Parallel Performance (ATMLPP) / ATMLAO - ATML
                      Application Optimization and User Service Tools (ATMLAO)},
      pid          = {G:(DE-HGF)POF3-511 / G:(DE-HGF)POF3-513 /
                      G:(DE-Juel-1)ATMLPP / G:(DE-Juel-1)ATMLAO},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.14529/jsfi180104},
      url          = {https://juser.fz-juelich.de/record/845503},
}