% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Thnnien:1048800,
      author       = {Thönnißen, Julia},
      title        = {{B}ridging the {G}ap {B}etween {S}torage and
                      {A}pplications: {A} {M}odular {C}oncept for {L}arge {I}mage
                      {D}ata {A}ccess},
      reportid     = {FZJ-2025-04913},
      year         = {2025},
      abstract     = {Recent advances in imaging technologies—particularly
                      high-throughput methods—have led to an unprecedented
                      growth of image datasets, reaching Terabytes to Petabytes in
                      scale. While such massive datasets offer transformative
                      potential for scientific discovery, they also introduce
                      significant challenges for visualization and analysis due to
                      the sheer size of the data and its continuous growth.
                      Visualizing, annotating, and analyzing large-scale image
                      datasets raises a fundamental dilemma of balancing
                      computational efficiency and memory requirements. Many
                      existing tools fail to manage large datasets effectively due
                      to memory constraints, often forcing lossy methods like
                      downsampling. Conversely, solutions optimized for large data
                      volumes frequently depend on specialized or proprietary
                      formats, reducing interoperability with other ecosystems.
                      This highlights diverging requirements: storage systems
                      favour compression for compactness, analysis tools require
                      fast data access, and visualization tools benefit from
                      tiled, multi-resolution formats. Without a unified strategy,
                      institutions often resort to inefficient workflows involving
                      repeated format conversions and costly data duplication to
                      support diverse applications. Ongoing standardization
                      efforts within the bioimaging community [1-4] represent
                      important developments towards more efficient and
                      standardized use of bioimaging data. However, the conversion
                      of data into a single (and yet evolving) standard is not
                      feasible for rapidly growing large-scale datasets,
                      especially given very diverging needs for parallel
                      processing on HPC systems. To address these issues, we
                      present a concept for a modular cloud-native image delivery
                      service designed to act as a flexible middleware layer
                      between large-scale image repositories and consuming
                      applications. The system supports heterogeneous input
                      formats and delivers transformed data views on demand. It
                      performs real-time operations such as coordinate
                      transformations, filtering, and multi-resolution tiling,
                      eliminating the need for pre-processing or intermediate
                      storage. The service offers an extensible set of access
                      points: RESTful APIs for web-based visualization (e.g.,
                      Neuroglancer, OpenSeadragon), virtual file system mounts for
                      file-oriented tools (e.g., OMERO, ImageJ), and programmatic
                      interfaces compatible with customizable environments (e.g.,
                      Napari, datalad). Additionally, it can dynamically present
                      standard-conformant data views—such as those aligned with
                      the Brain Imaging Data Structure (BIDS) [4]—from
                      arbitrarily organized datasets. By decoupling data access
                      from physical storage layout, the service facilitates
                      scalable, multi-tool interoperability in distributed
                      environments without data duplication. In summary, we
                      propose a flexible and extensible approach to image data
                      access that supports dynamic transformations, minimizes
                      redundancy, and bridges the gap between diverse storage
                      backends and modern, distributed applications. It aligns
                      with the FAIR data principles and builds upon community
                      standards while enabling efficient workflows for managing
                      and exploiting large-scale image datasets.<br><br>[1] S.
                      Besson et al., “Bringing Open Data to Whole Slide
                      Imaging”, Digital Pathology ECDP 2019, Lecture Notes in
                      Computer Science, vol. 11435, pp. 3–10, Jul. 2019, DOI:
                      $10.1007/978-3-030-23937-4_1$ <br>[2] J. Moore et al.,
                      “OME-NGFF: A next-generation file format for expanding
                      bioimaging data-access strategies”, Nature Methods, vol.
                      18, no. 12, pp. 1496–1498, Dec. 2021. DOI:
                      10.1038/s41592-021-01326-w.<br> [3] C. Allan et al.,
                      “OMERO: Flexible, model-driven data management for
                      experimental biology”, Nature Methods, vol. 9, no. 3, pp.
                      245–253, Mar. 2012. DOI: 10.1038/nmeth.1896. <br>[4] K. J.
                      Gorgolewski et al., “The brain imaging data structure, a
                      format for organizing and describing outputs of neuroimaging
                      experiments”, Scientific Data, vol. 3, no. 1, p. 160 044,
                      Jun. 2016. DOI: 10.1038/sdata.2016.44.<br>},
      month         = {Oct},
      date          = {2025-10-23},
      organization  = {Distribits 2025, Düsseldorf
                       (Germany), 23 Oct 2025 - 25 Oct 2025},
      subtyp        = {After Call},
      cin          = {INM-1},
      cid          = {I:(DE-Juel1)INM-1-20090406},
      pnm          = {5251 - Multilevel Brain Organization and Variability
                      (POF4-525) / 5254 - Neuroscientific Data Analytics and AI
                      (POF4-525) / HIBALL - Helmholtz International BigBrain
                      Analytics and Learning Laboratory (HIBALL) (InterLabs-0015)
                      / EBRAINS 2.0 - EBRAINS 2.0: A Research Infrastructure to
                      Advance Neuroscience and Brain Health (101147319) / DFG
                      project G:(GEPRIS)501864659 - NFDI4BIOIMAGE - Nationale
                      Forschungsdateninfrastruktur für Mikroskopie und
                      Bildanalyse (501864659)},
      pid          = {G:(DE-HGF)POF4-5251 / G:(DE-HGF)POF4-5254 /
                      G:(DE-HGF)InterLabs-0015 / G:(EU-Grant)101147319 /
                      G:(GEPRIS)501864659},
      typ          = {PUB:(DE-HGF)6},
      url          = {https://juser.fz-juelich.de/record/1048800},
}