% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Sasse:1018240,
author = {Sasse, Leonard and Nicolaisen-Sobesky, Eliana and Dukart,
Jürgen and Eickhoff, Simon B. and Götz, Michael and
Hamdan, Sami and Komeyer, Vera and Kulkarni, Abhijit and
Lahnakoski, Juha and Love, Bradley C. and Raimondo, Federico
and Patil, Kaustubh R.},
title = {{O}n {L}eakage in {M}achine {L}earning {P}ipelines},
publisher = {arXiv},
reportid = {FZJ-2023-04636},
year = {2023},
abstract = {Machine learning (ML) provides powerful tools for
predictive modeling. ML's popularity stems from the promise
of sample-level prediction with applications across a
variety of fields from physics and marketing to healthcare.
However, if not properly implemented and evaluated, ML
pipelines may contain leakage typically resulting in
overoptimistic performance estimates and failure to
generalize to new data. This can have severe negative
financial and societal implications. Our aim is to expand
understanding associated with causes leading to leakage when
designing, implementing, and evaluating ML pipelines.
Illustrated by concrete examples, we provide a comprehensive
overview and discussion of various types of leakage that may
arise in ML pipelines.},
keywords = {Machine Learning (cs.LG) (Other) / Artificial Intelligence
(cs.AI) (Other) / FOS: Computer and information sciences
(Other)},
cin = {INM-7},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5254 - Neuroscientific Data Analytics and AI (POF4-525)},
pid = {G:(DE-HGF)POF4-5254},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/ARXIV.2311.04179},
url = {https://juser.fz-juelich.de/record/1018240},
}