% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Thnnien:1046787,
author = {Thönnißen, Julia and Oliveira, Sarah and Oberstraß,
Alexander and Kropp, Jan-Oliver and Gui, Xiaoyun and
Schiffer, Christian and Dickscheid, Timo},
title = {{A} {P}erspective on {FAIR} and {S}calable {A}ccess to
{L}arge {I}mage {D}ata},
publisher = {Zenodo},
reportid = {FZJ-2025-03954},
pages = {1-3},
year = {2025},
abstract = {The rapid development of new imaging technologies across
scientific domains–especially high-throughput
technologies–results in a growing volume of image datasets
in the Tera- to Petabyte scale. Efficient visualization and
analysis of such massive image resources is critical but
remains challenging due to the sheer size of the data, its
continuous growth, and the limitations of conventional
software tools to address these problems. Tools for
visualization, annotation and analysis of large image data
are confronted with the fundamental dilemma of balancing
computational efficiency and memory requirements. Many tools
are unable to process large datasets due to memory
constraints, requiring workarounds like downsampling. On the
other hand, solutions that can handle large data efficiently
often rely on specialized or even proprietary file formats,
limiting interoperability with other software. This reflects
diverging requirements: storage favours compression for
efficiency, analysis demands fast data access, and
visualization requires tiled, multi-resolution
representations. Lacking a unified approach for these
conflicting needs, the operation of large and dynamically
evolving image repositories in practice often requires
undesirable data conversions and costly data duplication. In
addressing these challenges, the bioimaging community
increasingly adheres to the FAIR principles [1] through
national and international initiatives [2], [3], [4]. For
example, the Open Microscopy Environment (OME) fosters
standards such as OME-TIFF [5] and its cloud-native
successor OME-NGFF [6]; BioFormats [7] and OMERO [8]
facilitate metadata-rich data handling across diverse
platforms; and BrAinPI [9] provides web-based visualization
of images via Neuroglancer [10]. These tools represent
important developments towards more efficient and
standardized use of bioimaging data. However, for very large
and dynamically growing repositories, it is still not
feasible to settle on a single standard for a subset of
these tools, in particular in the light of very diverging
needs for massively parallel processing on HPC systems.
Therefore, converting data to a single target format is
often not a practical solution. We propose a concept for a
modular image delivery service which acts as a middleware
between large image data resources and applications, serving
image data from a cloud resource in multiple requested
representations on demand. The service allows reading data
stored in different input file formats, applying coordinate
transformations and filtering operations on-the-fly, and
serving the results in a range of different output formats
and layouts. Building upon a common framework for reading
and transforming data, an extensible set of access points
connects the service to client applications: Lightweight
REST APIs allow web-based mutli-resolution access (e.g., in
common formats such as used in Neuroglancer and
OpenSeadragon base viewers); mountable filesystem interfaces
enable linking the repository to file-oriented solutions
(e.g., OMERO, ImageJ); and programmatic access from
customizable software tools (e.g., Napari). To provide
compatibility with upcoming image data standards like BIDS
[11] and minimize conversion efforts, the service is able to
dynamically expose standard-conform views into arbitrarily
organized datasets. The proposed approach for reading and
transforming data on-the-fly eliminates the need for
redundant storage and application-specific conversions of
datasets, improving workflow efficiency and sustainability.
In summary, we advocate for the development of a flexible
and extensible image data service that supports large-scale
analysis, dynamic transformations, multi-tool
interoperability, and compatibility with community standards
for large image datasets. This way it supports the FAIR
principles, reduces integration barriers, meets the
performance demands of modern imaging research, and still
fosters the use of existing community developments.},
month = {Aug},
date = {2025-08-26},
organization = {2nd Conference on Research Data
Infrastructure (CoRDI), Aachen
(Germany), 26 Aug 2025 - 28 Aug 2025},
keywords = {data access (Other) / visualization (Other) /
interoperability (Other) / bioimaging (Other)},
cin = {INM-1},
cid = {I:(DE-Juel1)INM-1-20090406},
pnm = {5254 - Neuroscientific Data Analytics and AI (POF4-525) /
DFG project G:(GEPRIS)501864659 - NFDI4BIOIMAGE - Nationale
Forschungsdateninfrastruktur für Mikroskopie und
Bildanalyse (501864659) / EBRAINS 2.0 - EBRAINS 2.0: A
Research Infrastructure to Advance Neuroscience and Brain
Health (101147319) / HIBALL - Helmholtz International
BigBrain Analytics and Learning Laboratory (HIBALL)
(InterLabs-0015) / JL SMHB - Joint Lab Supercomputing and
Modeling for the Human Brain (JL SMHB-2021-2027) / X-BRAIN
(ZT-I-PF-4-061)},
pid = {G:(DE-HGF)POF4-5254 / G:(GEPRIS)501864659 /
G:(EU-Grant)101147319 / G:(DE-HGF)InterLabs-0015 /
G:(DE-Juel1)JL SMHB-2021-2027 / G:(DE-HGF)ZT-I-PF-4-061},
typ = {PUB:(DE-HGF)8},
doi = {10.5281/zenodo.16736220},
url = {https://juser.fz-juelich.de/record/1046787},
}