% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Quercia:1034963,
author = {Quercia, Alessio and Yildiz, Erenus and Cao, Zhuo and
Morrison, Abigail and Krajsek, Kai and Assent, Ira and
Scharr, Hanno},
title = {{M}ulti-{S}ource {A}uxiliary {T}asks supported {M}onocular
{D}epth {E}stimation},
reportid = {FZJ-2025-00071},
year = {2024},
note = {The original abstract contains figures that cannot be shown
here.},
abstract = {Monocular depth estimation (MDE) is a challenging task in
computer vision, often hindered by the cost and scarcity of
high-quality labeled datasets. We tackle this challenge
using auxiliary datasets from related vision tasks for joint
training of a shared decoder on top of a pre-trained vision
foundation model, while giving a higher weight to MDE.In
particular, we leverage a frozen DINOv2 ViT Giant model as a
feature extractor, bypassing the need for fine-tuning, and
jointly train a shared DPT decoder with auxiliary datasets
from related tasks to improve MDE. We illustrate the
qualitative and quantitative improvements of our method over
the DINOv2 MDE baseline in Figures 1 and 2,
respectively.Notably, compared to the recent Depth Anything,
which reports no improvements using a jointly fine-tuned
DINOv2 ViT Large and task-specific decoders, our method
successfully leverages auxiliary tasks.Through extensive
experiments we demonstrate the benefits of incorporating
various auxiliary datasets and tasks to improve MDE quality
on average by $~11\%$ for related datasets. Our experimental
analysis shows that auxiliary tasks have different impacts,
confirming the importance of task selection, highlighting
that quality gains are not achieved by merely adding data.
Remarkably, our study reveals that using semantic
segmentation datasets as multi-label dense classification
often results in additional quality gains.},
month = {Jun},
date = {2024-06-12},
organization = {Helmholtz AI Conference, Düsseldorf
(Germany), 12 Jun 2024 - 14 Jun 2024},
subtyp = {After Call},
cin = {IAS-8 / IAS-6 / JSC},
cid = {I:(DE-Juel1)IAS-8-20210421 / I:(DE-Juel1)IAS-6-20130828 /
I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
(POF4-511) / SLNS - SimLab Neuroscience (Helmholtz-SLNS)},
pid = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
G:(DE-Juel1)Helmholtz-SLNS},
typ = {PUB:(DE-HGF)6},
url = {https://juser.fz-juelich.de/record/1034963},
}