% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Wang:1048850,
author = {Wang, Qin and Bruns, Benjamin and Scharr, Hanno and
Krajsek, Kai},
title = {{S}elf-{S}upervised {L}earning based on {T}ransformed
{I}mage {R}econstruction for {E}quivariance-{C}oherent
{F}eature {R}epresentation},
reportid = {FZJ-2025-04955},
year = {2025},
abstract = {The equivariant behaviour of features is essential in many
computer vision tasks, yet popular self-supervised learning
(SSL) methods tend to constrain equivariance by design. We
propose a self-supervised learning approach where the system
learns transformations independently by reconstructing
images that have undergone previously unseen
transformations. Specifically, the model is tasked to
reconstruct intermediate transformed images, e.g. translated
or rotated images, without prior knowledge of these
transformations. This auxiliary task encourages the model to
develop equivariance-coherent features without relying on
predefined transformation rules. To this end, we apply
transformations to the input image, generating an image
pair, and then split the extracted features into two sets
per image. One set is used with a usual SSL loss encouraging
invariance, the other with our loss based on the auxiliary
task to reconstruct the intermediate transformed images. Our
loss and the SSL loss are linearly combined with weighted
terms. Evaluating on synthetic tasks with natural images,
our proposed method strongly outperforms all competitors,
regardless of whether they are designed to learn
equivariance. Furthermore, when trained alongside
augmentation-based methods as the invariance tasks, such as
iBOT or DINOv2, we successfully learn a balanced combination
of invariant and equivariant features. Our approach performs
strong on a rich set of realistic computer vision downstream
tasks, almost always improving over all baselines.},
cin = {IAS-8 / JSC},
cid = {I:(DE-Juel1)IAS-8-20210421 / I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
(POF4-511) / SLNS - SimLab Neuroscience (Helmholtz-SLNS)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
G:(DE-Juel1)Helmholtz-SLNS},
typ = {PUB:(DE-HGF)25},
doi = {10.34734/FZJ-2025-04955},
url = {https://juser.fz-juelich.de/record/1048850},
}