% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Mayfrank:1021653,
      author       = {Mayfrank, Daniel and Mitsos, Alexander and Dahmen, Manuel},
      title        = {{E}nd-to-{E}nd {R}einforcement {L}earning of {K}oopman
                      {M}odels for {E}conomic {N}onlinear {M}odel {P}redictive
                      {C}ontrol},
      publisher    = {arXiv},
      reportid     = {FZJ-2024-00909},
      year         = {2023},
      abstract     = {(Economic) nonlinear model predictive control ((e)NMPC)
                      requires dynamic system models that are sufficiently
                      accurate in all relevant state-space regions. These models
                      must also be computationally cheap enough to ensure
                      real-time tractability. Data-driven surrogate models for
                      mechanistic models can be used to reduce the computational
                      burden of (e)NMPC; however, such models are typically
                      trained by system identification for maximum average
                      prediction accuracy on simulation samples and perform
                      suboptimally as part of actual (e)NMPC. We present a method
                      for end-to-end reinforcement learning of dynamic surrogate
                      models for optimal performance in (e)NMPC applications,
                      resulting in predictive controllers that strike a favorable
                      balance between control performance and computational
                      demand. We validate our method on two applications derived
                      from an established nonlinear continuous stirred-tank
                      reactor model. We compare the controller performance to that
                      of MPCs utilizing models trained by the prevailing maximum
                      prediction accuracy paradigm, and model-free neural network
                      controllers trained using reinforcement learning. We show
                      that our method matches the performance of the model-free
                      neural network controllers while consistently outperforming
                      models derived from system identification. Additionally, we
                      show that the MPC policies can react to changes in the
                      control setting without retraining.},
      keywords     = {Machine Learning (cs.LG) (Other) / Systems and Control
                      (eess.SY) (Other) / FOS: Computer and information sciences
                      (Other) / FOS: Electrical engineering, electronic
                      engineering, information engineering (Other)},
      cin          = {IEK-10},
      cid          = {I:(DE-Juel1)IEK-10-20170217},
      pnm          = {1121 - Digitalization and Systems Technology for
                      Flexibility Solutions (POF4-112) / HDS LEE - Helmholtz
                      School for Data Science in Life, Earth and Energy (HDS LEE)
                      (HDS-LEE-20190612)},
      pid          = {G:(DE-HGF)POF4-1121 / G:(DE-Juel1)HDS-LEE-20190612},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/ARXIV.2308.01674},
      url          = {https://juser.fz-juelich.de/record/1021653},
}