% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Hamdan:1010545,
author = {Hamdan, Sami and Love, Bradley C and Polier, Georg von and
Weis, Susanne and Schwender, Holger and Eickhoff, Simon and
Patil, Kaustubh},
title = {{C}onfound-leakage: {C}onfound {R}emoval in {M}achine
{L}earning {L}eads to {L}eakage},
journal = {GigaScience},
volume = {12},
issn = {2047-217X},
address = {Oxford},
publisher = {Oxford University Press},
reportid = {FZJ-2023-03119},
pages = {giad071},
year = {2023},
note = {This work was partly supported by the Helmholtz-AI project
DeGen (ZT-I-PF-5-078), the Helmholtz Portfolio Theme
“Supercomputing and Modeling for the Human Brain,” and
Deutsche Forschungsgemeinschaft (DFG, German Research
Foundation), project-ID 431549029–SFB 1451 project B05.},
abstract = {BackgroundMachine learning (ML) approaches are a crucial
component of modern data analysis in many fields, including
epidemiology and medicine. Nonlinear ML methods often
achieve accurate predictions, for instance, in personalized
medicine, as they are capable of modeling complex
relationships between features and the target.
Problematically, ML models and their predictions can be
biased by confounding information present in the features.
To remove this spurious signal, researchers often employ
featurewise linear confound regression (CR). While this is
considered a standard approach for dealing with confounding,
possible pitfalls of using CR in ML pipelines are not fully
understood.ResultsWe provide new evidence that, contrary to
general expectations, linear confound regression can
increase the risk of confounding when combined with
nonlinear ML approaches. Using a simple framework that uses
the target as a confound, we show that information leaked
via CR can increase null or moderate effects to near-perfect
prediction. By shuffling the features, we provide evidence
that this increase is indeed due to confound-leakage and not
due to revealing of information. We then demonstrate the
danger of confound-leakage in a real-world clinical
application where the accuracy of predicting
attention-deficit/hyperactivity disorder is overestimated
using speech-derived features when using depression as a
confound.ConclusionsMishandling or even amplifying
confounding effects when building ML models due to
confound-leakage, as shown, can lead to untrustworthy,
biased, and unfair predictions. Our expose of the
confound-leakage pitfall and provided guidelines for dealing
with it can help create more robust and trustworthy ML
models.},
cin = {INM-7},
ddc = {610},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5252 - Brain Dysfunction and Plasticity (POF4-525) / SFB
1451 B05 - Einzelfallvorhersagen der motorischen
Fähigkeiten bei Gesunden und Patienten mit motorischen
Störungen (B05) (458640473)},
pid = {G:(DE-HGF)POF4-5252 / G:(GEPRIS)458640473},
typ = {PUB:(DE-HGF)16},
pubmed = {37776368},
UT = {WOS:001189196000001},
doi = {10.1093/gigascience/giad071},
url = {https://juser.fz-juelich.de/record/1010545},
}