% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@MASTERSTHESIS{Kromm:1046025,
author = {Kromm, Edward},
othercontributors = {Benassou, Sabrina and Kesselheim, Stefan},
title = {{D}ata {F}usion for {S}cene {G}raph {G}eneration:
{B}ridging {S}imulated and {R}eal-{W}orld {D}atasets},
school = {Hochschule Coburg},
type = {Masterarbeit},
address = {Jülich},
reportid = {FZJ-2025-03665},
pages = {102 pages: Figures, Tables},
year = {2025},
note = {Masterarbeit, Hochschule Coburg, 2025},
abstract = {Scene graph generation has emerged as a powerful tool for
AI-driven visual understandingof images by not only
detecting objects in an image but also predicting the
relationshipsbetween them, such as car–stops at–traffic
light or pedestrian–crosses–street. This capabilityis
particularly important for autonomous driving, where
relational context between roadusers and infrastructure
plays a critical role. However, the application of scene
graphgeneration in this domain is hindered by the scarcity
of annotated datasets. Drivingsimulators such as CARLA
provide a scalable alternative, enabling efficient data
generationcompared to manual annotation. Yet models trained
exclusively on simulated data oftenfail to generalize to
real-world data due to the substantial domain gap between
the two.This thesis addresses this challenge by proposing a
novel data fusion framework thatcombines simulated and real
datasets to construct autonomous driving–specific
relationshipannotations and subsequently bridge the domain
gap for real-world prediction. The workpresents the complete
pipeline, including dataset generation in simulation,
adaptationof publicly available resources, and augmentation
strategies. The Relation Transformermodel is analyzed in
depth, and particular attention is given to interpreting its
internalmechanisms by visualizing the learned attention maps
as heatmaps. This analysis providesinsights into whether the
model focuses on semantically meaningful regions when
predictingrelationships. Building on this understanding, two
new approaches are introduced to enableinference on real
data while transferring relational knowledge acquired in
simulation. Anablation study further quantifies the impact
of the domain gap on model performance andhighlights the
strengths and limitations of the proposed methods. Results
demonstratethat one of the developed approaches effectively
mitigates the simulation-to-reality gapand concrete
suggestions for advancing this technique toward further uses
for AI-drivenvisual understanding of images in the
automotive context are provided.},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5112 - Cross-Domain Algorithms, Tools, Methods Labs (ATMLs)
and Research Groups (POF4-511) / nxtAIM - nxtAIM – NXT GEN
AI Methods (19A23014l)},
pid = {G:(DE-HGF)POF4-5112 / G:(BMWK)19A23014l},
typ = {PUB:(DE-HGF)19},
doi = {10.34734/FZJ-2025-03665},
url = {https://juser.fz-juelich.de/record/1046025},
}