% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@MISC{Daniel:1028902,
      author       = {Daniel, Davis Thomas and Mitra, Souvik and Eichel,
                      Rüdiger-A. and Diddens, Diddo and Granwehr, Josef},
      title        = {{R}eplication {D}ata and {C}ode for: '{M}achine learning
                      isotropic g values of radical polymers'},
      publisher    = {Jülich DATA},
      reportid     = {FZJ-2024-04862},
      year         = {2024},
      abstract     = {This data repository contains the data sets and python
                      scripts associated with the manuscript 'Machine learning
                      isotropic g values of radical polymers '. Electron
                      paramagnetic resonance measurements allow for obtaining
                      experimental g values of radical polymers. Analogous to
                      chemical shifts, g values give insight into the identity and
                      environment of the paramagnetic center. In this work,
                      Machine learning based prediction of g values is explored as
                      a viable alternative to computationally expensive density
                      functional theory (DFT) methods. Description of folder
                      contents (switch to tree view): Datasets : Contains PTMA
                      polymer structures from TR, TE-1, and TE-2 data sets
                      transformed using a molecular descriptor (SOAP, MBTR or DAD)
                      and corresponding DFT-calculated g values. Filenames contain
                      $'PTMA_X'$ where X denotes the number of monomers which are
                      radicals. Structure data sets have $'structure_data'$ in the
                      title, DFT calculated g values have $'giso_DFT_data'$ in the
                      title. The files are in .npy (NumPy) format. Models : ERT
                      models trained on SOAP, MBTR and DAD feature vectors.
                      Scripts : Contains scripts which can be used to predict g
                      values from XYZ files of PTMA structures with 6 monomer
                      units and varying radical density. The script
                      $'prediction_functions.py'$ contains the functions which
                      transform the XYZ coordinates into an appropriate feature
                      vector which the trained model uses to predict. Description
                      of individual functions are also given as docstrings (python
                      documentation strings) in the code. The folder also contains
                      additional files needed for the ERT-DAD model in .pkl
                      format. $XYZ_files$ : Contains atomic coordinates of PTMA
                      structures in XYZ format. Two subfolders : WSD and TE-2
                      correspond to structures present in the whole structure data
                      set and TE-2 test data set (see main text in the manuscript
                      for details). Filenames in the folder
                      $'XYZ_files/TE-2/PTMA-X/'$ are of the type
                      $'chainlength_6ptma_Y'_Y''.xyz'$ where $'chainlength_6ptma'$
                      denotes the length of polymer chain (6 monomers), Y' denotes
                      the proportion of monomers which are radicals (for instance,
                      Y' = 50 means 3 out of 6 monomers are radicals) and Y''
                      denotes the order of the MD time frame. Actual time frame
                      values of Y'' in ps is given in the manuscript.
                      PTMA-ML.ipynb : Jupyter notebook detailing the workflow of
                      generating the trained model. The file includes steps to
                      load data sets, transform xyz files using molecular
                      descriptors, optimise hyperparameters , train the model,
                      cross validate using the training data set and evaluate the
                      model. PTMA-ML.pdf : PTMA-ML.ipynb in PDF format. List of
                      abbreviations : PTMA :
                      poly(2,2,6,6-tetramethyl-1-piperidinyloxy-4-yl methacrylate)
                      TR : Training data set TE-1 : Test data set 1 TE-2 : Test
                      data set 2 ERT : Extremely randomized trees WSD : Whole
                      structure data set SOAP : Smooth overlap of atomic orbitals
                      MBTR : Many-body tensor representation DAD :
                      Distances-Angles-Dihedrals},
      cin          = {IET-1},
      cid          = {I:(DE-Juel1)IET-1-20110218},
      pnm          = {1223 - Batteries in Application (POF4-122) / DFG project
                      422726248 - SPP 2248: Polymer-basierte Batterien (422726248)
                      / HITEC - Helmholtz Interdisciplinary Doctoral Training in
                      Energy and Climate Research (HITEC) (HITEC-20170406)},
      pid          = {G:(DE-HGF)POF4-1223 / G:(GEPRIS)422726248 /
                      G:(DE-Juel1)HITEC-20170406},
      typ          = {PUB:(DE-HGF)32},
      doi          = {10.26165/JUELICH-DATA/TOBXWP},
      url          = {https://juser.fz-juelich.de/record/1028902},
}