% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Mayfrank:1021653,
author = {Mayfrank, Daniel and Mitsos, Alexander and Dahmen, Manuel},
title = {{E}nd-to-{E}nd {R}einforcement {L}earning of {K}oopman
{M}odels for {E}conomic {N}onlinear {M}odel {P}redictive
{C}ontrol},
publisher = {arXiv},
reportid = {FZJ-2024-00909},
year = {2023},
abstract = {(Economic) nonlinear model predictive control ((e)NMPC)
requires dynamic system models that are sufficiently
accurate in all relevant state-space regions. These models
must also be computationally cheap enough to ensure
real-time tractability. Data-driven surrogate models for
mechanistic models can be used to reduce the computational
burden of (e)NMPC; however, such models are typically
trained by system identification for maximum average
prediction accuracy on simulation samples and perform
suboptimally as part of actual (e)NMPC. We present a method
for end-to-end reinforcement learning of dynamic surrogate
models for optimal performance in (e)NMPC applications,
resulting in predictive controllers that strike a favorable
balance between control performance and computational
demand. We validate our method on two applications derived
from an established nonlinear continuous stirred-tank
reactor model. We compare the controller performance to that
of MPCs utilizing models trained by the prevailing maximum
prediction accuracy paradigm, and model-free neural network
controllers trained using reinforcement learning. We show
that our method matches the performance of the model-free
neural network controllers while consistently outperforming
models derived from system identification. Additionally, we
show that the MPC policies can react to changes in the
control setting without retraining.},
keywords = {Machine Learning (cs.LG) (Other) / Systems and Control
(eess.SY) (Other) / FOS: Computer and information sciences
(Other) / FOS: Electrical engineering, electronic
engineering, information engineering (Other)},
cin = {IEK-10},
cid = {I:(DE-Juel1)IEK-10-20170217},
pnm = {1121 - Digitalization and Systems Technology for
Flexibility Solutions (POF4-112) / HDS LEE - Helmholtz
School for Data Science in Life, Earth and Energy (HDS LEE)
(HDS-LEE-20190612)},
pid = {G:(DE-HGF)POF4-1121 / G:(DE-Juel1)HDS-LEE-20190612},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/ARXIV.2308.01674},
url = {https://juser.fz-juelich.de/record/1021653},
}