% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Sasse:1043079,
author = {Sasse, L. and Nicolaisen, Eliana and Dukart, Jürgen and
Eickhoff, S. B. and Götz, M. and Hamdan, S. and Komeyer, V.
and Kulkarni, A. and Lahnakoski, J. M. and Love, B. C. and
Raimondo, F. and Patil, Kaustubh R.},
title = {{O}verview of leakage scenarios in supervised machine
learning},
journal = {Journal of Big Data},
volume = {12},
number = {1},
issn = {2196-1115},
address = {Heidelberg [u.a.]},
publisher = {SpringerOpen},
reportid = {FZJ-2025-02765},
pages = {135},
year = {2025},
abstract = {Machine learning (ML) provides powerful tools for
predictive modeling. ML’s popularity stems from the
promise of sample-level prediction with applications across
a variety of fields from physics and marketing to
healthcare. However, if not properly implemented and
evaluated, ML pipelines may contain leakage typically
resulting in overoptimistic performance estimates and
failure to generalize to new data. This can have severe
negative financial and societal implications. Our aim is to
expand understanding associated with causes leading to
leakage when designing, implementing, and evaluating ML
pipelines. Illustrated by concrete examples, we provide a
comprehensive overview and discussion of various types of
leakage that may arise in ML pipelines.},
cin = {INM-7},
ddc = {004},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5254 - Neuroscientific Data Analytics and AI (POF4-525)},
pid = {G:(DE-HGF)POF4-5254},
typ = {PUB:(DE-HGF)16},
UT = {WOS:001498691400001},
doi = {10.1186/s40537-025-01193-8},
url = {https://juser.fz-juelich.de/record/1043079},
}