% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Upschulte:1033584,
author = {Upschulte, Eric and Harmeling, Stefan and Amunts, Katrin
and Dickscheid, Timo},
title = {{T}owards {U}niversal {I}nstance {S}egmentation {M}odels in
{B}iomedical {I}maging},
reportid = {FZJ-2024-06464},
year = {2024},
abstract = {Precise instance segmentation is crucial in many biomedical
research fields. One key challenge is applying models to new
data domains, typically involving pre-training on a larger
corpus of data and fine-tuning with new annotations for each
specific domain. This process is labor-intensive and
requires creating and maintaining multiple branched versions
of the model. Working towards universal instance
segmentation models in biomedical imaging, we propose to
unify domain-adapted model branches into a single multi-
expert model, following a foundation model paradigm. Our
goal is to replace most existing fine-tuning scenarios with
prompt-based user instructions, allowing the user to clearly
state the task and object classes of interest. We
hypothesize that such a combined approach improves
generalization, as the base model can benefit from datasets
that were previously only used for fine-tuning. A key
challenge in the creation of such models is to resolve
training conflicts and ambiguity in a pragmatic fashion when
combining different segmentation tasks, datasets, and data
domains. Such conflicts can occur if datasets focus on
different classes in the same domain. For example, some
datasets annotate all cells, while others focus on specific
cell types. A naïve combination of such sets would create
an ill-posed learning problem for most models, requiring
them to infer their task from their input, which is
undesirable in a universal setting. Models like SAM and
MedSAM highlight the potential of prompting, but often
require external detectors and fine-tuning. Here, we propose
to leverage prompt-based task descriptions as a tool to
manipulate general model behavior, such that user
instructions yield domain expert models. We test our
approach by training a Contour Proposal Network (CPN) on a
multi-modal data collection, including the TissueNet
dataset. Prompts, such as “cell segmentation” or simply
“nuclei”, modify the CPN to focus on segmenting the
respective object classes, achieving a mean F1 score in
TissueNet of 0.90 (0.88 for cells, 0.92 for nuclei), which
is on par with specialized models and surpasses the naïve
combination showing 0.84 (0.81, 0.87) without prompting.
Overall, the proposed approach introduces an interactive
linguistic component that enables the conflict-free
composition of various segmentation datasets, thus allowing
to unify previously separated segmentation tasks. With that,
we consider it an important step towards universal models.},
month = {Nov},
date = {2024-11-19},
organization = {INM Retreat 2024, Jülich (Germany),
19 Nov 2024 - 20 Nov 2024},
subtyp = {After Call},
cin = {INM-1},
cid = {I:(DE-Juel1)INM-1-20090406},
pnm = {5251 - Multilevel Brain Organization and Variability
(POF4-525) / 5254 - Neuroscientific Data Analytics and AI
(POF4-525) / HIBALL - Helmholtz International BigBrain
Analytics and Learning Laboratory (HIBALL) (InterLabs-0015)
/ Helmholtz AI - Helmholtz Artificial Intelligence
Coordination Unit – Local Unit FZJ (E.40401.62) / EBRAINS
2.0 - EBRAINS 2.0: A Research Infrastructure to Advance
Neuroscience and Brain Health (101147319)},
pid = {G:(DE-HGF)POF4-5251 / G:(DE-HGF)POF4-5254 /
G:(DE-HGF)InterLabs-0015 / G:(DE-Juel-1)E.40401.62 /
G:(EU-Grant)101147319},
typ = {PUB:(DE-HGF)24},
url = {https://juser.fz-juelich.de/record/1033584},
}