% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Patnala:1037598,
author = {Patnala, Ankit and Stadtler, Scarlet and Schultz, Martin G.
and Gall, Juergen},
title = {{B}i-modal contrastive learning for crop classification
using {S}entinel-2 and {P}lanetscope},
journal = {Frontiers in remote sensing},
volume = {5},
issn = {2673-6187},
address = {Lausanne},
publisher = {Frontiers Media},
reportid = {FZJ-2025-00769},
pages = {1480101},
year = {2024},
abstract = {Remote sensing has enabled large-scale crop classification
for understanding agricultural ecosystems and estimating
production yields. In recent years, machine learning has
become increasingly relevant for automated crop
classification. However, the existing algorithms require a
huge amount of annotated data. Self-supervised learning,
which enables training on unlabeled data, has great
potential to overcome the problem of annotation. Contrastive
learning, a self-supervised approach based on instance
discrimination, has shown promising results in the field of
natural as well as remote sensing images. Crop data often
consists of field parcels or sets of pixels from small
spatial regions. Additionally, one needs to account for
temporal patterns to correctly label crops. Hence, the
standard approaches for landcover classification cannot be
applied. In this work, we propose two contrastive
self-supervised learning approaches to obtain a pre-trained
model for crop classification without the need for labeled
data. First, we adopt the uni-modal contrastive method
(SCARF) and, second, we use a bi-modal approach based on
Sentinel-2 and Planetscope data instead of standard
transformations developed for natural images to accommodate
the spectral characteristics of crop pixels. Evaluation in
three regions of Germany and France shows that crop
classification with the pre-trained multi-modal model is
superior to the pre-trained uni-modal method as well as the
supervised baseline models in the majority of test cases.},
cin = {JSC},
ddc = {600},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
(SDLs) and Research Groups (POF4-511)},
pid = {G:(DE-HGF)POF4-5111},
typ = {PUB:(DE-HGF)16},
UT = {WOS:001380631400001},
doi = {10.3389/frsen.2024.1480101},
url = {https://juser.fz-juelich.de/record/1037598},
}