% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Dagaev:1024830,
author = {Dagaev, Nikolay and Roads, Brett D. and Luo, Xiaoliang and
Barry, Daniel N. and Patil, Kaustubh R. and Love, Bradley
C.},
title = {{A} too-good-to-be-true prior to reduce shortcut reliance},
journal = {Pattern recognition letters},
volume = {166},
issn = {0167-8655},
address = {Amsterdam [u.a.]},
publisher = {Elsevier},
reportid = {FZJ-2024-02496},
pages = {164 - 171},
year = {2023},
abstract = {Despite their impressive performance in object recognition
and other tasks under standard testing conditions, deep
networks often fail to generalize to out-of-distribution
(o.o.d.) samples. One cause for this shortcoming is that
modern architectures tend to rely on ǣshortcutsǥ
superficial features that correlate with categories without
capturing deeper invariants that hold across contexts.
Real-world concepts often possess a complex structure that
can vary superficially across contexts, which can make the
most intuitive and promising solutions in one context not
generalize to others. One potential way to improve o.o.d.
generalization is to assume simple solutions are unlikely to
be valid across contexts and avoid them, which we refer to
as the too-good-to-be-true prior. A low-capacity network
(LCN) with a shallow architecture should only be able to
learn surface relationships, including shortcuts. We find
that LCNs can serve as shortcut detectors. Furthermore, an
LCN’s predictions can be used in a two-stage approach to
encourage a high-capacity network (HCN) to rely on deeper
invariant features that should generalize broadly. In
particular, items that the LCN can master are downweighted
when training the HCN. Using a modified version of the
CIFAR-10 dataset in which we introduced shortcuts, we found
that the two-stage LCN-HCN approach reduced reliance on
shortcuts and facilitated o.o.d. generalization.},
cin = {INM-7},
ddc = {004},
cid = {I:(DE-Juel1)INM-7-20090406},
pnm = {5251 - Multilevel Brain Organization and Variability
(POF4-525) / 5254 - Neuroscientific Data Analytics and AI
(POF4-525)},
pid = {G:(DE-HGF)POF4-5251 / G:(DE-HGF)POF4-5254},
typ = {PUB:(DE-HGF)16},
pubmed = {37915616},
UT = {WOS:000935348300001},
doi = {10.1016/j.patrec.2022.12.010},
url = {https://juser.fz-juelich.de/record/1024830},
}