% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Jitsev:141532,
author = {Jitsev, Jenia and Abraham, Nobi and Tittgemeyer, Marc and
Morrison, Abigail},
title = {{F}unctional role of opponent, dopamine modulated {D}1/{D}2
plasticity in prediction error-driven reinforcement learning
in the basal ganglia},
reportid = {FZJ-2013-06700},
year = {2013},
abstract = {In this work, we introduce a spiking actor-critic network
model of learning from both reward and punishment in the
basal ganglia. Both the dorsal (actor) and ventral (critic)
striatum are assumed to contain populations of D1 and D2
medium spiny neurons (MSNs). In the ventral striatum, this
allows separate representation of both positive and negative
expected outcomes by respective D1/D2 MSN populations, which
we hypothesize to reside in the shell part of the Nucleus
Accumbens. The positive and negative outcome expectations
are fed to dopamine (DA) neurons in VTA region, which
compute and signal total prediction error by DA release.
Based on recent experimental work [1], DA level is assumed
to modulate plasticity of D1 and D2 synapses in opposing
way, inducing LTP on D1 and LTD on D2 synapses if being high
and vice versa if being low. Crucially, this form of
opponent plasticity implements temporal-difference (TD)-like
update of both positive and negative outcome expectations
and performs appropriate adaptation of action preferences.We
implemented the network in the NEST simulator [2] using
leaky integrate-and-fire spiking neurons, and designed a
battery of experiments in various grid world tasks. Across
the tasks the network can learn both to approach the delayed
rewards while consequently avoiding punishments, which posed
severe difficulties for the previous model without D1/D2
segregation [3]. The model highlights thus the functional
role of D1/D2 MSN segregation within the striatum in
implementing appropriate TD-like learning from both reward
and punishment and explains necessity for opponent direction
of DA-dependent plasticity found at synapses converging on
distinct striatal MSN types. The approach can be further
extended to study how abnormal D1/D2 plasticity may lead to
a reorganization of the basal ganglia network towards
pathological, dysfunctional states, like for instance those
observed in Parkinson disease under condition of progressive
dopamine depletion.[1] Shen, W., Flajolet, M., Greengard, P.
and Surmeier, D. J. Dichotomous dopaminergic control of
striatal synaptic plasticity. Science, 2008, 321, 848-851[2]
Gewaltig M-O and Diesmann M (2007). NEST, Scholarpedia
2(4):1430[3] Potjans, W., Diesmann, M. and Morrison, A. An
imperfect dopaminergic error signal can drive
temporal-difference learning. PLoS Comput. Biol., 2011, 7},
month = {Oct},
date = {2013-10-22},
organization = {Computational Psychiatry 2013, Miami
(USA), 22 Oct 2013 - 23 Oct 2013},
subtyp = {Other},
cin = {INM-6 / IAS-6},
cid = {I:(DE-Juel1)INM-6-20090406 / I:(DE-Juel1)IAS-6-20130828},
pnm = {311 - Signaling pathways, cell and tumor biology (POF2-311)
/ HASB - Helmholtz Alliance on Systems Biology
(HGF-SystemsBiology) / SMHB - Supercomputing and Modelling
for the Human Brain (HGF-SMHB-2013-2017) / W2Morrison -
W2/W3 Professorinnen Programm der Helmholtzgemeinschaft
(B1175.01.12)},
pid = {G:(DE-HGF)POF2-311 / G:(DE-Juel1)HGF-SystemsBiology /
G:(DE-Juel1)HGF-SMHB-2013-2017 / G:(DE-HGF)B1175.01.12},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/141532},
}