% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Komeyer:1034781,
author = {Komeyer, Vera and Patil, Kaustubh and Reuter, Martin and
Wolfers, Thomas and Li, Jingwei},
title = {{D}ata leakage in machine learning: {A} conceptual take},
reportid = {FZJ-2024-07535},
year = {2024},
abstract = {Symposium:Machine learning (ML) and artificial intelligence
(AI) are increasingly being applied to study how individual
differences in the brain can manifest as distinct
psychiatric illnesses. These models can help us establish
the neural correlates of mental distress and predict
individual-level diagnosis, symptoms, trajectories, and
treatment responses. To realize the full potential of these
models it is important to recognize their data requirements
as well as biases in data and modeling choices that can
limit applicability and insights provided by the models.
Biases in these models and data can lead to inaccurate and
unfair predictions and overlook individual variations, which
can have serious consequences for patients and they can
perpetuate and amplify existing health disparities and
inequalities. These biases may arise from methodological
choices including the neuroimaging modality and state,
behavioral phenotypes, data transformation, sample size,
population, and modeling pipelines. It is crucial to
carefully evaluate the risks associated with AI/ML-based
modeling such as biases and develop strategies to identify
and mitigate them. In doing so we can improve the accuracy,
fairness, and reliability of the predictions and ensure that
they benefit all patients equally. This symposium will
discuss opportunities and challenges related to application
of AI/ML in neuroimaging data from both applied and
conceptual perspectives.Data Leakage Talk:ML's popularity
stems from the promise of sample-level prediction using high
dimensional data. However, if not properly implemented and
evaluated, data-leakage in ML pipelines may result in
overoptimistic performance estimates and fail to generalize
to new data. In this talk I will discuss data-leakage
associated challenges and remedies.},
month = {Mar},
date = {2024-03-06},
organization = {DGKN, Frankfurt am Main (Germany), 6
Mar 2024 - 9 Mar 2024},
subtyp = {Invited},
cin = {INM-7},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5254 - Neuroscientific Data Analytics and AI (POF4-525) /
DFG project G:(GEPRIS)431549029 - SFB 1451:
Schlüsselmechanismen normaler und krankheitsbedingt
gestörter motorischer Kontrolle (431549029)},
pid = {G:(DE-HGF)POF4-5254 / G:(GEPRIS)431549029},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1034781},
}