% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Tian:1017950,
      author       = {Tian, Liang and Sedona, Rocco and Mozaffari, Amirpasha and
                      Kreshpa, Enxhi and Paris, Claudia and Riedel, Morris and
                      Schultz, Martin G. and Cavallaro, Gabriele},
      title        = {{E}nd-to-{E}nd {P}rocess {O}rchestration of {E}arth
                      {O}bservation {D}ata {W}orkflows with {A}pache {A}irflow on
                      {H}igh {P}erformance {C}omputing},
      publisher    = {IEEE},
      reportid     = {FZJ-2023-04455},
      pages        = {711-714},
      year         = {2023},
      comment      = {IGARSS 2023 - 2023 IEEE International Geoscience and Remote
                      Sensing Symposium : [Proceedings] - IEEE, 2023. - ISBN
                      979-8-3503-2010-7 - doi:10.1109/IGARSS52108.2023.10283416},
      booktitle     = {IGARSS 2023 - 2023 IEEE International
                       Geoscience and Remote Sensing Symposium
                       : [Proceedings] - IEEE, 2023. - ISBN
                       979-8-3503-2010-7 -
                       doi:10.1109/IGARSS52108.2023.10283416},
      abstract     = {Earth Observation (EO) data processing faces challenges due
                      to large volumes, multiple sources, and diverse formats. To
                      address this issue, this paper presents a scalable and
                      parallelizable workflow using Apache Airflow, capable of
                      integrating Machine Learning (ML) and Deep Learning (DL)
                      models with Modular Supercomputing Architecture (MSA)
                      systems. To test the workflow, we considered the production
                      of large-scale Land-Cover (LC) maps as a case study. The
                      workflow manager, Airflow, offers scalability,
                      extensibility, and programmable task definition in Python.
                      It allows us to execute different steps of the workflow in
                      different High-Performance Computing (HPC) systems. The
                      workflow is demonstrated on the Dynamical Exascale Entry
                      Platform (DEEP) and Jülich Research on Exascale Cluster
                      Architectures (JURECA) hosted at the Jülich Supercomputing
                      Centre (JSC), a platform that incorporates heterogeneous JSC
                      systems.},
      month         = {Jul},
      date          = {2023-07-16},
      organization  = {IEEE International Geoscience and
                       Remote Sensing Symposium (IGARSS),
                       Pasadena (CA), 16 Jul 2023 - 21 Jul
                       2023},
      cin          = {JSC},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / 5112 - Cross-Domain
                      Algorithms, Tools, Methods Labs (ATMLs) and Research Groups
                      (POF4-511) / RAISE - Research on AI- and Simulation-Based
                      Engineering at Exascale (951733) / EUROCC-2 (DEA02266)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(DE-HGF)POF4-5112 /
                      G:(EU-Grant)951733 / G:(DE-Juel-1)DEA02266},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      UT           = {WOS:001098971601004},
      doi          = {10.1109/IGARSS52108.2023.10283416},
      url          = {https://juser.fz-juelich.de/record/1017950},
}