% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Doncevic:917550,
author = {Doncevic, Danimir and Mitsos, Alexander and Guo, Yue and
Li, Qianxiao and Dietrich, Felix and Dahmen, Manuel and
Kevrekidis, Ioannis G.},
title = {{A} {R}ecursively {R}ecurrent {N}eural {N}etwork ({R}2{N}2)
{A}rchitecture for {L}earning {I}terative {A}lgorithms},
publisher = {arXiv},
reportid = {FZJ-2023-00752},
year = {2022},
abstract = {Meta-learning of numerical algorithms for a given task
consist of the data-driven identification and adaptation of
an algorithmic structure and the associated hyperparameters.
To limit the complexity of the meta-learning problem, neural
architectures with a certain inductive bias towards
favorable algorithmic structures can, and should, be used.
We generalize our previously introduced Runge-Kutta neural
network to a recursively recurrent neural network (R2N2)
superstructure for the design of customized iterative
algorithms. In contrast to off-the-shelf deep learning
approaches, it features a distinct division into modules for
generation of information and for the subsequent assembly of
this information towards a solution. Local information in
the form of a subspace is generated by subordinate, inner,
iterations of recurrent function evaluations starting at the
current outer iterate. The update to the next outer iterate
is computed as a linear combination of these evaluations,
reducing the residual in this space, and constitutes the
output of the network. We demonstrate that regular training
of the weight parameters inside the proposed superstructure
on input/output data of various computational problem
classes yields iterations similar to Krylov solvers for
linear equation systems, Newton-Krylov solvers for nonlinear
equation systems, and Runge-Kutta integrators for ordinary
differential equations. Due to its modularity, the
superstructure can be readily extended with functionalities
needed to represent more general classes of iterative
algorithms traditionally based on Taylor series expansions.},
keywords = {Machine Learning (cs.LG) (Other) / Numerical Analysis
(math.NA) (Other) / FOS: Computer and information sciences
(Other) / FOS: Mathematics (Other)},
cin = {IEK-10},
cid = {I:(DE-Juel1)IEK-10-20170217},
pnm = {1121 - Digitalization and Systems Technology for
Flexibility Solutions (POF4-112) / HDS LEE - Helmholtz
School for Data Science in Life, Earth and Energy (HDS LEE)
(HDS-LEE-20190612)},
pid = {G:(DE-HGF)POF4-1121 / G:(DE-Juel1)HDS-LEE-20190612},
typ = {PUB:(DE-HGF)25},
doi = {10.48550/ARXIV.2211.12386},
url = {https://juser.fz-juelich.de/record/917550},
}