% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Semcheddine:1045000,
      author       = {Semcheddine, Asma and Lessig, Christian and Luise, Ilaria
                      and Schultz, Martin and Langguth, Michael and Melidonis,
                      Savvas},
      title        = {{T}he {E}ffect of {BERT} {T}raining on {A}tmospheric {D}ata
                      {I}nterpolation},
      reportid     = {FZJ-2025-03482},
      year         = {2025},
      abstract     = {Atmospheric science has witnessed a breakthrough in recent
                      years by harnessing deep learning models to understand and
                      replicate the complex relationships within and between
                      different atmospheric variables. Atmorep [1], a foundational
                      model of atmospheric dynamics, was developed as a
                      task-agnostic model, trained on 40 years of hourly data in a
                      BERT-style manner, with up to $90\%$ of the data being
                      masked, in order to provide a plethora of downstream
                      applications. To further assess the model's ability to learn
                      a comprehensive abstract representation of atmospheric data,
                      we tested several systematic token-masking strategies
                      (geographical masking, temporal masking, a hybrid pattern
                      combining both, and masking along model levels) and examined
                      their effects on its data interpolation performance. Our
                      preliminary results indicate that the coupled-fields
                      transformer slightly outperforms the single-field
                      transformer, reinforcing the correlation between different
                      atmospheric fields. At a $75\%$ compression ratio, AtmoRep
                      achieves good reconstruction for the temperature field and
                      all three wind components. Additionally, AtmoRep appears to
                      benefit from the hybrid masking pattern, offering further
                      insights into large-scale representation learning and
                      enhancing our understanding of data-driven atmospheric
                      modeling.},
      month         = {Jun},
      date          = {2025-06-23},
      organization  = {Dynamics Days Europe 2025,
                       Thessaloniki (Greece), 23 Jun 2025 - 27
                       Jun 2025},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / Earth System Data
                      Exploration (ESDE) / BMFTR 01LK2316A - Warmworld Smarter
                      (IconRep) (-01LK2316A)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(DE-Juel-1)ESDE /
                      G:(BMFTR)-01LK2316A},
      typ          = {PUB:(DE-HGF)1},
      url          = {https://juser.fz-juelich.de/record/1045000},
}