% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Frings:811621,
author = {Frings, Wolfgang},
title = {{E}fficient {T}ask-{L}ocal {I}/{O} {O}perations of
{M}assively {P}arallel {A}pplications},
volume = {30},
school = {RWTH Aachen},
type = {Dr.},
address = {Jülich},
publisher = {Forschungszentrum Jülich GmbH Zentralbibliothek, Verlag},
reportid = {FZJ-2016-04033},
isbn = {978-3-95806-152-1},
series = {Schriften des Forschungszentrums Jülich. IAS Series},
pages = {xiv, 140 S.},
year = {2016},
note = {RWTH Aachen, Diss., 2016},
abstract = {Applications on current large-scale HPC systems use
enormous numbers of processing elements for their
computation and have access to large amounts of main memory
for their data. Nevertheless, they still need file-system
access to maintain program and application data
persistently. Characteristic I/O patterns that produce a
high load on the file system often occurduring access to
checkpoint and restart files, which have to be frequently
stored to allow the application to be restarted after
program termination or system failure. On large-scale HPC
systems with distributed memory, each application task will
often perform such I/O individually by creating task-local
file objects on the file system. At large scale, these I/O
patterns impose substantial stress on the metadata
management components of the I/O subsystem. For example, the
simultaneous creation of thousands of task-local files in
the same directory can cause delays of several minutes. Also
at the startup of dynamically linked applications, such
metadata contention occurs while searching for library files
and induces a comparably high metadata load on the file
system. Even mid-scale applications cause in such load
scenarios startup delays of ten minutes or more. Therefore,
dynamic linking and loading is nowadays not applied on large
HPC systems, although dynamic linking has many advantages
for managing large code bases. The reason for these
limitations is that POSIX I/O and the dynamic loader are
implemented as serial components of the operating system and
do not take advantage of the parallel nature of the I/O
operations. To avoid the above bottlenecks, this work
describes two novel approaches for the integration of
locality awareness (e.g., through aggregation or caching)
into the serial I/O operations of parallel applications. The
underlying methods are implemented in two tools,
$\textit{SIONlib}$ and $\textit{Spindle}$, which exploit the
knowledge of application parallelism to coordinate access to
file-system objects. In addition, the applied methods also
use knowledge of the underlying I/O subsystem structure, the
parallel file system configuration, and the network
betweenHPC-system and I/O system to optimize application
I/O. Both tools add layers between the parallel application
and the POSIX-based standard interfaces of the operating
system for I/O and dynamic loading, eliminating the need for
modifying the underlying system software. SIONlib is already
applied in several applications, including PEPC, muphi, and
MP2C, to implement efficient checkpointing. In addition,
SIONlib is integrated in the performance-analysis tools
Scalasca and Score-P to efficiently store and read trace
data. Latest benchmarks on the Blue Gene/Q in Jülich
demonstrate that SIONlib solves the metadata problem at
large scale by running efficiently up to 1.8 million tasks
while maintaining high I/O bandwidths of 60-80\% of
file-system peak with a negligible file-creation time. The
scalability of Spindle could be demonstrated by running the
Pynamic benchmark, a proxy benchmark for a real application,
on a cluster of Lawrence Livermore National Laboratory at
large scale. The results show that the startup of
dynamically linked applications is now feasible on more than
15000 tasks, whereas the overhead of Spindle is nearly
constantly low. With SIONlib and Spindle, this work
demonstrates how scalability of operating system components
can be improved without modifying them and without changing
the I/O patterns of applications. In this way, SIONlib and
Spindle represent prototype implementations of functionality
needed by next-generation runtime systems.},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {511 - Computational Science and Mathematical Methods
(POF3-511) / ATMLAO - ATML Application Optimization and User
Service Tools (ATMLAO)},
pid = {G:(DE-HGF)POF3-511 / G:(DE-Juel-1)ATMLAO},
typ = {PUB:(DE-HGF)11},
urn = {urn:nbn:de:0001-2016062000},
url = {https://juser.fz-juelich.de/record/811621},
}