% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Thnnien:1048800,
author = {Thönnißen, Julia},
title = {{B}ridging the {G}ap {B}etween {S}torage and
{A}pplications: {A} {M}odular {C}oncept for {L}arge {I}mage
{D}ata {A}ccess},
reportid = {FZJ-2025-04913},
year = {2025},
abstract = {Recent advances in imaging technologies—particularly
high-throughput methods—have led to an unprecedented
growth of image datasets, reaching Terabytes to Petabytes in
scale. While such massive datasets offer transformative
potential for scientific discovery, they also introduce
significant challenges for visualization and analysis due to
the sheer size of the data and its continuous growth.
Visualizing, annotating, and analyzing large-scale image
datasets raises a fundamental dilemma of balancing
computational efficiency and memory requirements. Many
existing tools fail to manage large datasets effectively due
to memory constraints, often forcing lossy methods like
downsampling. Conversely, solutions optimized for large data
volumes frequently depend on specialized or proprietary
formats, reducing interoperability with other ecosystems.
This highlights diverging requirements: storage systems
favour compression for compactness, analysis tools require
fast data access, and visualization tools benefit from
tiled, multi-resolution formats. Without a unified strategy,
institutions often resort to inefficient workflows involving
repeated format conversions and costly data duplication to
support diverse applications. Ongoing standardization
efforts within the bioimaging community [1-4] represent
important developments towards more efficient and
standardized use of bioimaging data. However, the conversion
of data into a single (and yet evolving) standard is not
feasible for rapidly growing large-scale datasets,
especially given very diverging needs for parallel
processing on HPC systems. To address these issues, we
present a concept for a modular cloud-native image delivery
service designed to act as a flexible middleware layer
between large-scale image repositories and consuming
applications. The system supports heterogeneous input
formats and delivers transformed data views on demand. It
performs real-time operations such as coordinate
transformations, filtering, and multi-resolution tiling,
eliminating the need for pre-processing or intermediate
storage. The service offers an extensible set of access
points: RESTful APIs for web-based visualization (e.g.,
Neuroglancer, OpenSeadragon), virtual file system mounts for
file-oriented tools (e.g., OMERO, ImageJ), and programmatic
interfaces compatible with customizable environments (e.g.,
Napari, datalad). Additionally, it can dynamically present
standard-conformant data views—such as those aligned with
the Brain Imaging Data Structure (BIDS) [4]—from
arbitrarily organized datasets. By decoupling data access
from physical storage layout, the service facilitates
scalable, multi-tool interoperability in distributed
environments without data duplication. In summary, we
propose a flexible and extensible approach to image data
access that supports dynamic transformations, minimizes
redundancy, and bridges the gap between diverse storage
backends and modern, distributed applications. It aligns
with the FAIR data principles and builds upon community
standards while enabling efficient workflows for managing
and exploiting large-scale image datasets.<br><br>[1] S.
Besson et al., “Bringing Open Data to Whole Slide
Imaging”, Digital Pathology ECDP 2019, Lecture Notes in
Computer Science, vol. 11435, pp. 3–10, Jul. 2019, DOI:
$10.1007/978-3-030-23937-4_1$ <br>[2] J. Moore et al.,
“OME-NGFF: A next-generation file format for expanding
bioimaging data-access strategies”, Nature Methods, vol.
18, no. 12, pp. 1496–1498, Dec. 2021. DOI:
10.1038/s41592-021-01326-w.<br> [3] C. Allan et al.,
“OMERO: Flexible, model-driven data management for
experimental biology”, Nature Methods, vol. 9, no. 3, pp.
245–253, Mar. 2012. DOI: 10.1038/nmeth.1896. <br>[4] K. J.
Gorgolewski et al., “The brain imaging data structure, a
format for organizing and describing outputs of neuroimaging
experiments”, Scientific Data, vol. 3, no. 1, p. 160 044,
Jun. 2016. DOI: 10.1038/sdata.2016.44.<br>},
month = {Oct},
date = {2025-10-23},
organization = {Distribits 2025, Düsseldorf
(Germany), 23 Oct 2025 - 25 Oct 2025},
subtyp = {After Call},
cin = {INM-1},
cid = {I:(DE-Juel1)INM-1-20090406},
pnm = {5251 - Multilevel Brain Organization and Variability
(POF4-525) / 5254 - Neuroscientific Data Analytics and AI
(POF4-525) / HIBALL - Helmholtz International BigBrain
Analytics and Learning Laboratory (HIBALL) (InterLabs-0015)
/ EBRAINS 2.0 - EBRAINS 2.0: A Research Infrastructure to
Advance Neuroscience and Brain Health (101147319) / DFG
project G:(GEPRIS)501864659 - NFDI4BIOIMAGE - Nationale
Forschungsdateninfrastruktur für Mikroskopie und
Bildanalyse (501864659)},
pid = {G:(DE-HGF)POF4-5251 / G:(DE-HGF)POF4-5254 /
G:(DE-HGF)InterLabs-0015 / G:(EU-Grant)101147319 /
G:(GEPRIS)501864659},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1048800},
}