% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MISC{Daniel:1028902,
author = {Daniel, Davis Thomas and Mitra, Souvik and Eichel,
Rüdiger-A. and Diddens, Diddo and Granwehr, Josef},
title = {{R}eplication {D}ata and {C}ode for: '{M}achine learning
isotropic g values of radical polymers'},
publisher = {Jülich DATA},
reportid = {FZJ-2024-04862},
year = {2024},
abstract = {This data repository contains the data sets and python
scripts associated with the manuscript 'Machine learning
isotropic g values of radical polymers '. Electron
paramagnetic resonance measurements allow for obtaining
experimental g values of radical polymers. Analogous to
chemical shifts, g values give insight into the identity and
environment of the paramagnetic center. In this work,
Machine learning based prediction of g values is explored as
a viable alternative to computationally expensive density
functional theory (DFT) methods. Description of folder
contents (switch to tree view): Datasets : Contains PTMA
polymer structures from TR, TE-1, and TE-2 data sets
transformed using a molecular descriptor (SOAP, MBTR or DAD)
and corresponding DFT-calculated g values. Filenames contain
$'PTMA_X'$ where X denotes the number of monomers which are
radicals. Structure data sets have $'structure_data'$ in the
title, DFT calculated g values have $'giso_DFT_data'$ in the
title. The files are in .npy (NumPy) format. Models : ERT
models trained on SOAP, MBTR and DAD feature vectors.
Scripts : Contains scripts which can be used to predict g
values from XYZ files of PTMA structures with 6 monomer
units and varying radical density. The script
$'prediction_functions.py'$ contains the functions which
transform the XYZ coordinates into an appropriate feature
vector which the trained model uses to predict. Description
of individual functions are also given as docstrings (python
documentation strings) in the code. The folder also contains
additional files needed for the ERT-DAD model in .pkl
format. $XYZ_files$ : Contains atomic coordinates of PTMA
structures in XYZ format. Two subfolders : WSD and TE-2
correspond to structures present in the whole structure data
set and TE-2 test data set (see main text in the manuscript
for details). Filenames in the folder
$'XYZ_files/TE-2/PTMA-X/'$ are of the type
$'chainlength_6ptma_Y'_Y''.xyz'$ where $'chainlength_6ptma'$
denotes the length of polymer chain (6 monomers), Y' denotes
the proportion of monomers which are radicals (for instance,
Y' = 50 means 3 out of 6 monomers are radicals) and Y''
denotes the order of the MD time frame. Actual time frame
values of Y'' in ps is given in the manuscript.
PTMA-ML.ipynb : Jupyter notebook detailing the workflow of
generating the trained model. The file includes steps to
load data sets, transform xyz files using molecular
descriptors, optimise hyperparameters , train the model,
cross validate using the training data set and evaluate the
model. PTMA-ML.pdf : PTMA-ML.ipynb in PDF format. List of
abbreviations : PTMA :
poly(2,2,6,6-tetramethyl-1-piperidinyloxy-4-yl methacrylate)
TR : Training data set TE-1 : Test data set 1 TE-2 : Test
data set 2 ERT : Extremely randomized trees WSD : Whole
structure data set SOAP : Smooth overlap of atomic orbitals
MBTR : Many-body tensor representation DAD :
Distances-Angles-Dihedrals},
cin = {IET-1},
cid = {I:(DE-Juel1)IET-1-20110218},
pnm = {1223 - Batteries in Application (POF4-122) / DFG project
422726248 - SPP 2248: Polymer-basierte Batterien (422726248)
/ HITEC - Helmholtz Interdisciplinary Doctoral Training in
Energy and Climate Research (HITEC) (HITEC-20170406)},
pid = {G:(DE-HGF)POF4-1223 / G:(GEPRIS)422726248 /
G:(DE-Juel1)HITEC-20170406},
typ = {PUB:(DE-HGF)32},
doi = {10.26165/JUELICH-DATA/TOBXWP},
url = {https://juser.fz-juelich.de/record/1028902},
}