% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Zhang:1052751,
      author       = {Zhang, Hongzhen and Kerkenhoff, Tim and Kichler, Neil and
                      Dahmen, Manuel and Mitsos, Alexander and Naumann, Uwe and
                      Bongartz, Dominik},
      title        = {{A}ccelerating {D}eterministic {G}lobal {O}ptimization via
                      {GPU}-parallel {I}nterval {A}rithmetic},
      publisher    = {arXiv},
      reportid     = {FZJ-2026-01108},
      year         = {2025},
      abstract     = {Spatial Branch and Bound $(B\&B)$ algorithms are widely
                      used for solving nonconvex problems to global optimality,
                      yet they remain computationally expensive. Though some works
                      have been carried out to speed up $B\&B$ via CPU
                      parallelization, GPU parallelization is much less explored.
                      In this work, we investigate the design of a spatial $B\&B$
                      algorithm that involves an interval-based GPU-parallel lower
                      bounding solver: The domain of each $B\&B$ node is
                      temporarily partitioned into numerous subdomains, then
                      massive GPU parallelism is leveraged to compute interval
                      bounds of the objective function and constraints on each
                      subdomain, using the Mean Value Form. The resulting bounds
                      are tighter than those achieved via regular interval
                      arithmetic without partitioning, but they remain fast to
                      compute. We implement the method into our open-source solver
                      MAiNGO via CUDA in two manners: wrapping all GPU tasks
                      within one kernel function, or distributing the GPU tasks
                      onto a CUDA graph. Numerical experiments show that using
                      more subdomains leads to significantly tighter lower bounds
                      and thus less $B\&B$ iterations. Regarding wall clock time,
                      the proposed spatial $B\&B$ framework achieves a speedup of
                      three orders of magnitude compared to applying interval
                      arithmetic on the CPU without domain partitioning. Among the
                      two implementations, the one developed with CUDA graph
                      enables higher efficiency. Moreover, in some case studies,
                      the proposed method delivers competitive or better
                      performance compared to MAiNGO's default solver which is
                      based on McCormick relaxations. These results highlight the
                      potential of GPU-accelerated bounding techniques to
                      accelerate $B\&B$ algorithms.},
      keywords     = {Optimization and Control (math.OC) (Other) / Distributed,
                      Parallel, and Cluster Computing (cs.DC) (Other) / FOS:
                      Mathematics (Other) / FOS: Computer and information sciences
                      (Other) / 90C26, 90C30, 90-04, 90-08 (Other)},
      cin          = {ICE-1},
      cid          = {I:(DE-Juel1)ICE-1-20170217},
      pnm          = {1121 - Digitalization and Systems Technology for
                      Flexibility Solutions (POF4-112)},
      pid          = {G:(DE-HGF)POF4-1121},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/ARXIV.2507.20769},
      url          = {https://juser.fz-juelich.de/record/1052751},
}