% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Leuridan:1052199,
author = {Leuridan, Mathilde and Hawkes, James and Smart, Simon and
Danovaro, Emanuele and Schultz, Martin and Quintino, Tiago},
title = {{P}olytope: an algorithm for efficient feature extraction
on hypercubes},
journal = {Journal of Big Data},
volume = {12},
number = {1},
issn = {2196-1115},
address = {Heidelberg [u.a.]},
publisher = {SpringerOpen},
reportid = {FZJ-2026-00834},
pages = {243},
year = {2025},
abstract = {Data extraction algorithms on data hypercubes, or
datacubes, are traditionally only capable of cutting boxes
of data along the datacube axes. For many use cases however,
this returns much more data than users actually need,
leading to an unnecessary consumption of I/O resources. In
this paper, we propose an alternative feature extraction
technique, which carefully computes the indices of data
points contained within user-requested shapes. This enables
data storage systems to only read and return bytes useful to
user applications from the datacube. Our main algorithm is
based on high-dimensional computational geometry concepts
and operates by successively reducing polytopes down to the
points contained within them. We analyse this algorithm in
detail before providing results about its performance and
scalability. In particular, we show it is possible to
achieve data reductions of up to $99\%$ using this algorithm
instead of current state of practice data extraction
methods, such as meteorological field extractions from
ECMWF’s FDB data store, where feature shapes are extracted
a posteriori as a post-processing step. As we discuss later
on, this novel extraction method will considerably help
scale access to large petabyte size data hypercubes in a
variety of scientific fields.},
cin = {JSC},
ddc = {004},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / Earth System Data
Exploration (ESDE)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-Juel-1)ESDE},
typ = {PUB:(DE-HGF)16},
doi = {10.1186/s40537-025-01306-3},
url = {https://juser.fz-juelich.de/record/1052199},
}