% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Brmmel:818244,
author = {Brömmel, Dirk and Frings, Wolfgang and Wylie, Brian J. N.},
title = {{E}xtreme-scaling applications en route to exascale},
address = {New York},
publisher = {ACM Press},
reportid = {FZJ-2016-04722},
pages = {10},
year = {2016},
comment = {Proceedings of the Exascale Applications and Software
Conference 2016},
booktitle = {Proceedings of the Exascale
Applications and Software Conference
2016},
abstract = {Feedback from the previous year's very successful workshop
motivated the organisation of a three-day workshop from 1 to
3 February 2016, during which the 28-rack JUQUEEN BlueGene/Q
system with 458 752 cores was reserved for over 50 hours.
Eight international code teams were selected to use this
opportunity to investigate and improve their application
scalability, assisted by staff from JSC Simulation
Laboratories and Cross-Sectional Teams. Ultimately seven
teams had codes successfully run on the full JUQUEEN system.
Strong scalability demonstrated by Code Saturne and
Seven-League Hydro, both using 4 OpenMP threads for 16 MPI
processes on each compute node for a total of 1 835 008
threads, qualify them for High-Q Club membership. Existing
members CIAO and iFETI were able to show that they had
additional solvers which also scaled acceptably.
Furthermore, large-scale in-situ interactive visualisation
was demonstrated with a CIAO simulation using 458 752 MPI
processes running on 28 racks coupled via JUSITU to VisIt.
The two adaptive mesh refinement utilities, ICI and p4est,
showed that they could respectively scale to run with 458
752 and 971 504 MPI ranks, but both encountered problems
loading large meshes. Parallel file I/O issues also hindered
large-scale executions of PFLOTRAN. Poor performance of a
NEST-import module which loaded and connected 1.9 TiB of
neuron and synapse data was tracked down to an internal
data-structure mismatch with the HDF5 file objects that
prevented use of MPI collective file reading, which when
rectified is expected to enable large-scale neuronal network
simulations.Comparative analysis is provided to the 25 codes
in the High-Q Club at the start of 2016, which includes five
codes that qualified from the previous workshop. Despite
more mixed results, we learnt more about application file
I/O limitations and inefficiencies which continue to be the
primary inhibitor to large-scale simulations.},
date = {04262016},
organization = {Exascale Applications and Software
Conference 2016, Stockholm (Sweden), 26
Apr 2016 - 29 Apr 2016},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {511 - Computational Science and Mathematical Methods
(POF3-511) / ATMLPP - ATML Parallel Performance (ATMLPP) /
ATMLAO - ATML Application Optimization and User Service
Tools (ATMLAO)},
pid = {G:(DE-HGF)POF3-511 / G:(DE-Juel-1)ATMLPP /
G:(DE-Juel-1)ATMLAO},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.1145/2938615.2938616},
url = {https://juser.fz-juelich.de/record/818244},
}