% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MASTERSTHESIS{Effen:1051601,
author = {Effen, Moritz},
title = {{A}n {I}nvestigation of a {M}ultimodal {V}ariational
{A}utoencoder {F}ramework for {P}hysics {D}ata},
school = {RWTH Aachen},
type = {Masterarbeit},
reportid = {FZJ-2026-00524},
pages = {75p},
year = {2025},
note = {Masterarbeit, RWTH Aachen, 2025},
abstract = {Many scientific domains, such as physics, provide
multimodal data when observing complex phenomena or when
doing experiments. Understanding individual contributions of
each modality can help to optimise experimental setups and
sensors, thereby potentially increasing accuracy on
domain-specific tasks that rely on such data. This thesis
examines the role of multimodal data in (downstream)
prediction tasks, with a focus on the unique and shared
contributions of the respective modalities. Disentangled
representation learning is a paradigm that aims to extract
the independent, underlying factors from data. We employ
this approach for multimodal data, proposing an extension to
the disentangled multimodal variational autoencoder (DMVAE)
by incorporating an additional optimisation objective to
enforce minimal redundancy between shared and unique latent
representations extracted by the DMVAE. Based on these
representations, we train and evaluate several downstream
tasks to study their contributions to the task. We compare
this method to the traditional DMVAE and VAE across
multimodal and single-modal configurations and also compare
it directly to regression models. In our experiments, this
approach is applied to the Multimodal Universe (MMU)
astronomical dataset, which includes both imagery and
spectral data. We also evaluate the impact of a
physical-model-based differentiable image decoder model for
extracting meaningful parameters into the latent space.
Addi-tionally, the setup is applied to HyPlant hyperspectral
remote sensing data, which consists of airborne measurements
of Earth’s surface, to study it as a source of multimodal
data to test how much information images and spectra contain
about hyperspectral data.},
cin = {IAS-8},
cid = {I:(DE-Juel1)IAS-8-20210421},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511)},
pid = {G:(DE-HGF)POF4-5112},
typ = {PUB:(DE-HGF)19},
doi = {10.34734/FZJ-2026-00524},
url = {https://juser.fz-juelich.de/record/1051601},
}