% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Brozos:1025679,
      author       = {Brozos, Christoforos and Rittig, Jan G. and Bhattacharya,
                      Sandip and Akanny, Elie and Kohlmann, Christina and Mitsos,
                      Alexander},
      title        = {{G}raph {N}eural {N}etworks for {S}urfactant
                      {M}ulti-{P}roperty {P}rediction},
      publisher    = {arXiv},
      reportid     = {FZJ-2024-03068},
      year         = {2024},
      abstract     = {Surfactants are of high importance in different industrial
                      sectors such as cosmetics, detergents, oil recovery and drug
                      delivery systems. Therefore, many quantitative
                      structure-property relationship (QSPR) models have been
                      developed for surfactants. Each predictive model typically
                      focuses on one surfactant class, mostly nonionics. Graph
                      Neural Networks (GNNs) have exhibited a great predictive
                      performance for property prediction of ionic liquids,
                      polymers and drugs in general. Specifically for surfactants,
                      GNNs can successfully predict critical micelle concentration
                      (CMC), a key surfactant property associated with
                      micellization. A key factor in the predictive ability of
                      QSPR and GNN models is the data available for training.
                      Based on extensive literature search, we create the largest
                      available CMC database with 429 molecules and the first
                      large data collection for surface excess concentration
                      ($Γ$$_{m}$), another surfactant property associated with
                      foaming, with 164 molecules. Then, we develop GNN models to
                      predict the CMC and $Γ$$_{m}$ and we explore different
                      learning approaches, i.e., single- and multi-task learning,
                      as well as different training strategies, namely ensemble
                      and transfer learning. We find that a multi-task GNN with
                      ensemble learning trained on all $Γ$$_{m}$ and CMC data
                      performs best. Finally, we test the ability of our CMC model
                      to generalize on industrial grade pure component
                      surfactants. The GNN yields highly accurate predictions for
                      CMC, showing great potential for future industrial
                      applications.},
      keywords     = {Chemical Physics (physics.chem-ph) (Other) / Machine
                      Learning (cs.LG) (Other) / FOS: Physical sciences (Other) /
                      FOS: Computer and information sciences (Other)},
      cin          = {IEK-10},
      cid          = {I:(DE-Juel1)IEK-10-20170217},
      pnm          = {899 - ohne Topic (POF4-899)},
      pid          = {G:(DE-HGF)POF4-899},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/ARXIV.2401.01874},
      url          = {https://juser.fz-juelich.de/record/1025679},
}