% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Morita:826548,
author = {Morita, Kenji and Jitsev, Jenia and Morrison, Abigail},
title = {{C}orticostriatal circuit mechanisms of value-based action
selection: {I}mplementation of reinforcement learning
algorithms and beyond},
journal = {Behavioural brain research},
volume = {311},
issn = {0166-4328},
address = {Amsterdam},
publisher = {Elsevier},
reportid = {FZJ-2017-00771},
pages = {110 - 121},
year = {2016},
abstract = {Value-based action selection has been suggested to be
realized in the corticostriatal local circuits through
competition among neural populations. In this article, we
review theoretical and experimental studies that have
constructed and verified this notion, and provide new
perspectives on how the local-circuit selection mechanisms
implement reinforcement learning (RL) algorithms and
computations beyond them. The striatal neurons are mostly
inhibitory, and lateral inhibition among them has been
classically proposed to realize “Winner-Take-All (WTA)”
selection of the maximum-valued action (i.e., ‘max’
operation). Although this view has been challenged by the
revealed weakness, sparseness, and asymmetry of lateral
inhibition, which suggest more complex dynamics, WTA-like
competition could still occur on short time scales. Unlike
the striatal circuit, the cortical circuit contains
recurrent excitation, which may enable retention or temporal
integration of information and probabilistic “soft-max”
selection. The striatal “max” circuit and the cortical
“soft-max” circuit might co-implement an RL algorithm
called Q-learning; the cortical circuit might also similarly
serve for other algorithms such as SARSA. In these
implementations, the cortical circuit presumably sustains
activity representing the executed action, which negatively
impacts dopamine neurons so that they can calculate
reward-prediction-error. Regarding the suggested more
complex dynamics of striatal, as well as cortical, circuits
on long time scales, which could be viewed as a sequence of
short WTA fragments, computational roles remain open: such a
sequence might represent (1) sequential state-action-state
transitions, constituting replay or simulation of the
internal model, (2) a single state/action by the whole
trajectory, or (3) probabilistic sampling of state/action.},
cin = {INM-6 / IAS-6},
ddc = {610},
cid = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)IAS-6-20130828},
pnm = {574 - Theory, modelling and simulation (POF3-574) / 571 -
Connectivity and Activity (POF3-571) / SMHB - Supercomputing
and Modelling for the Human Brain (HGF-SMHB-2013-2017) /
RL-BRD-J - Neural network mechanisms of reinforcement
learning (BMBF-01GQ1343) / W2Morrison - W2/W3 Professorinnen
Programm der Helmholtzgemeinschaft (B1175.01.12)},
pid = {G:(DE-HGF)POF3-574 / G:(DE-HGF)POF3-571 /
G:(DE-Juel1)HGF-SMHB-2013-2017 / G:(DE-Juel1)BMBF-01GQ1343 /
G:(DE-HGF)B1175.01.12},
typ = {PUB:(DE-HGF)16},
UT = {WOS:000380418200012},
pubmed = {pmid:27173430},
doi = {10.1016/j.bbr.2016.05.017},
url = {https://juser.fz-juelich.de/record/826548},
}