% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Raimondo:1043691,
      author       = {Raimondo, Federico and Komeyer, Vera and Nieto, Nicolas and
                      Wu, Jianxiao and Patil, Kaustubh},
      title        = {{C}aveats and {G}uidelines to {S}afely {A}pply {M}achine
                      {L}earning in {C}onsciousness {R}esearch},
      reportid     = {FZJ-2025-02989},
      year         = {2025},
      abstract     = {SummaryMachine Learning (ML) is getting widely used within
                      the field of consciousness research. For example, it allowed
                      classification of clinical cases, variant mental states,
                      unconscious states during anesthesia, and diverse brain
                      processes on transition to sleep.Yet, specific challenges
                      emerge when adopting ML in consciousness studies, due to the
                      nature of data, experimental design and sample size.
                      Improperly addressing these issues can lead to
                      overestimation, misinterpretation and invalidation of
                      findings. The goal of the tutorial is to provide
                      explanations and techniques to mitigate these challenges,
                      around three main axes:1) Data-leakage: ML aims to predict
                      outcomes in unseen data. If one evaluates models on the same
                      data that the algorithm was trained on, this constitutes
                      data-leakage. Practices which are common in cognitive
                      science can lead to data-leakage, like whole-data processing
                      (e.g. ICA during EEG cleaning) or choosing the wrong metric
                      and performance estimation method (e.g. ROC-AUC using
                      leave-one-out cross-validation).2) Imbalanced Learning: ML
                      relies on datapoint examples that span the entire outcome
                      distribution. However, sometimes, different outcomes are
                      unequally represented (e.g. decoding a rare mental state),
                      leading to an imbalanced number of examples across
                      outcomes.3) Confounding: ML models can reveal associations
                      between predictors and an outcome variable. Such
                      associations are often attributed to the data’s nature and
                      origin. However, there may be confounding variables (i.e.
                      disregarded variables that correlate with both predictors
                      and the outcome) that can drive the
                      associations.Additionally, we plan to offer guidelines to
                      correctly interpret scientific findings obtained from ML
                      models in consciousness research.Rationale on speaker
                      selection and proof of their expertiseThe speakers were
                      selected to comprise experts varying from cognitive
                      neuroscience to computer science.Dr. Raimondo is a computer
                      scientist with published work in consciousness science using
                      ML methods. Particularly, on the diagnosis of patients with
                      Disorders of Consciousness from brain (Engemann et al. 2018)
                      and bodily signals (Raimondo et al. 2017), responsiveness
                      and sleep (Strauss et al. 2022), decoding of mental states
                      during mind blanking (Mortaheb et al. 2022) and the
                      characterization of its physiological states (Boulakis et
                      al. 2024).Dr. Nieto is a biomedical engineer with a PhD in
                      neurophysiology. He has published on biases in ML models due
                      to gender imbalance (Larrazabal et al. 2020) as well as
                      leakage on data harmonization, imbalanced data and ML models
                      (Nieto et al. 2024).Dr. Wu is an electrical engineer with a
                      PhD in medical sciences, centered in the study of
                      brain-behavior relationships through neuroimaging and
                      machine learning. She published on the challenges in
                      brain-based prediction of behavior (Wu et al. 2023).Dr.
                      Patil leads the Applied Machine Learning group at the
                      Institute of Neuroscience and Medicine-7: Brain and
                      Behavior, where he bridges domain-agnostic ML models with
                      cognitive and neuroimaging research, including publications
                      on data-leakage (Sasse et al. 2024) and confounding (Hamdan
                      et al. 2023).MSc. Komeyer is a PhD candidate focusing on
                      confounding variables in neuroimaging-based predictive
                      models. Relevant work for this tutorial includes conceptual
                      considerations regarding confounding variables in
                      biomedicine (Komeyer, et al. 2024a) and causal inference
                      using ML in precision medicine (Komeyer, et al.
                      2024b).Desired educational expectationsThis tutorial is
                      intended for any researcher who uses (or is planning to use)
                      ML in their research. There is no specific background and
                      knowledge required, though in their own interest, it will be
                      better if they understand what machine learning is and how
                      it is typically applied. For this matter, prior to the
                      tutorial, we will send reading material that could help
                      attendees to get introduced to some of the key concepts.
                      Nevertheless, we will also briefly introduce them in the
                      first talk.Proposed audience engagementAfter the
                      presentations, there will be a 1-hour slot in which the
                      attendees will be able to present their projects and have a
                      $Q\&A/discussion$ with the panel. During this section,
                      participants will be able to introduce their projects,
                      including presenting a few slides if needed, and proceed to
                      discuss possible manners to overcome the project-specific
                      challenges. The main goal is that attendees obtain
                      applicable knowledge for their specific projects.The
                      structure of tutorial will be communicated to the attendees
                      in advance, requesting the attendees to manifest their
                      intentions and clearly define an agenda. At the end of the
                      tutorial all materials and codes will be made publicly
                      available and shared with the attendees, so they can use
                      them in their research.Planned structureThe tutorial will
                      have two stages. The first stage consists of 4 presentations
                      of 25 minutes each (including 5 minutes for questions), with
                      two brief 10 minutes pauses after the second talk and the
                      last talk.The first talk will introduce general ML concepts
                      that are required to understand and follow the rest of the
                      tutorial. We will first introduce some examples of ML in
                      consciousness science as well as key concepts and specific
                      terminology used in the ML field.The three following talks
                      will address each one of the three challenges: Data-Leakage,
                      Imbalanced Learning and Confounding. We will first introduce
                      the problematics and challenges, including, when possible,
                      examples on applications of ML to consciousness science.
                      Each talk will also provide methods and tools that could be
                      used to address these challenges and safely use ML within
                      consciousness research.In a second stage, we will hold a
                      1-hour interactive session in which the attendees, if they
                      desire, can present their research projects or ask questions
                      to the panel.Rationale on panel inclusivityThis group of
                      speakers is diverse in several factors. Regarding
                      nationality, it is comprised by two South Americans
                      (Argentina), two Asians (China and India) and a European
                      (Germany). With respect to gender expression, two females
                      and three males. In terms of career stage, by one PhD
                      candidate, two post-doctoral researchers, a recently
                      appointed team leader and an established group leader.},
      month         = {Jul},
      date          = {2025-07-06},
      organization  = {28th Annual Meeting of the Association
                       for the Scientific Study of
                       Consciousness, Heraklion (Greece), 6
                       Jul 2025 - 9 Jul 2025},
      subtyp        = {Invited},
      cin          = {INM-7},
      cid          = {I:(DE-Juel1)INM-7-20090406},
      pnm          = {5254 - Neuroscientific Data Analytics and AI (POF4-525) /
                      5251 - Multilevel Brain Organization and Variability
                      (POF4-525)},
      pid          = {G:(DE-HGF)POF4-5254 / G:(DE-HGF)POF4-5251},
      typ          = {PUB:(DE-HGF)6},
      url          = {https://juser.fz-juelich.de/record/1043691},
}