% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Frings:141704,
      author       = {Frings, Wolfgang and Ahn, Dong H. and LeGendre, Matthew and
                      Gamblin, Todd and de Supinski, Bronis R. and Wolf, Felix},
      title        = {{M}assively parallel loading},
      address      = {New York, NY},
      publisher    = {ACM Press New York, New York, USA},
      reportid     = {FZJ-2014-00071},
      isbn         = {978-1-4503-2130-3},
      pages        = {389-398},
      year         = {2013},
      comment      = {Proceedings of the 27th international ACM conference on
                      International conference on supercomputing - ICS '13},
      booktitle     = {Proceedings of the 27th international
                       ACM conference on International
                       conference on supercomputing - ICS '13},
      abstract     = {Dynamic linking has many advantages for managing large code
                      bases, but dynamically linked applications have not
                      typically scaled well on high performance computing systems.
                      Splitting a monolithic executable into many dynamic shared
                      object (DSO) files decreases compile time for large codes,
                      reduces runtime memory requirements by allowing modules to
                      be loaded and unloaded as needed, and allows common DSOs to
                      be shared among many executables. However, launching an
                      executable that depends on many DSOs causes a flood of file
                      system operations at program start-up, when each process in
                      the parallel application loads its dependencies. At large
                      scales, this operation has an effect similar to a site-wide
                      denial-of-service attack, as even large parallel file
                      systems struggle to service so many simultaneous requests.
                      In this paper, we present SPINDLE, a novel approach to
                      parallel loading that coordinates simultaneous file system
                      operations with a scalable network of cache server
                      processes. Our approach is transparent to user applications.
                      We extend the GNU loader, which is used in Linux as well as
                      proprietary operating systems, to limit the number of
                      simultaneous file system operations, quickly loading DSOs
                      without thrashing the file system. Our experiments show that
                      our prototype implementation has a low overhead and
                      increases the scalability of Pynamic, a benchmark that
                      stresses the dynamic loader, by a factor of 20.},
      month         = {Jun},
      date          = {2013-06-10},
      organization  = {the 27th international ACM conference,
                       Eugene (Oregon), 10 Jun 2013 - 14 Jun
                       2013},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {41G - Supercomputer Facility (POF2-41G21) / ATMLAO - ATML
                      Application Optimization and User Service Tools (ATMLAO)},
      pid          = {G:(DE-HGF)POF2-41G21 / G:(DE-Juel-1)ATMLAO},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1145/2464996.2465020},
      url          = {https://juser.fz-juelich.de/record/141704},
}