% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Sherratt:1009058,
      author       = {Sherratt, Katharine and Gruson, Hugo and Grah, Rok and
                      Johnson, Helen and Niehus, Rene and Prasse, Bastian and
                      Sandmann, Frank and Deuschel, Jannik and Wolffram, Daniel
                      and Abbott, Sam and Ullrich, Alexander and Gibson, Graham
                      and Ray, Evan L and Reich, Nicholas G and Sheldon, Daniel
                      and Wang, Yijin and Wattanachit, Nutcha and Wang, Lijing and
                      Trnka, Jan and Obozinski, Guillaume and Sun, Tao and Thanou,
                      Dorina and Pottier, Loic and Krymova, Ekaterina and Meinke,
                      Jan H and Barbarossa, Maria Vittoria and Leithauser, Neele
                      and Mohring, Jan and Schneider, Johanna and Wlazlo, Jaroslaw
                      and Fuhrmann, Jan and Lange, Berit and Rodiah, Isti and
                      Baccam, Prasith and Gurung, Heidi and Stage, Steven and
                      Suchoski, Bradley and Budzinski, Jozef and Walraven, Robert
                      and Villanueva, Inmaculada and Tucek, Vit and Smid, Martin
                      and Zajicek, Milan and Perez Alvarez, Cesar and Reina, Borja
                      and Bosse, Nikos I and Meakin, Sophie R and Castro, Lauren
                      and Fairchild, Geoffrey and Michaud, Isaac and Osthus, Dave
                      and Alaimo Di Loro, Pierfrancesco and Maruotti, Antonello
                      and Eclerova, Veronika and Kraus, Andrea and Kraus, David
                      and Pribylova, Lenka and Dimitris, Bertsimas and Li, Michael
                      Lingzhi and Saksham, Soni and Dehning, Jonas and Mohr,
                      Sebastian and Priesemann, Viola and Redlarski, Grzegorz and
                      Bejar, Benjamin and Ardenghi, Giovanni and Parolini, Nicola
                      and Ziarelli, Giovanni and Bock, Wolfgang and Heyder, Stefan
                      and Hotz, Thomas and Singh, David E and Guzman-Merino,
                      Miguel and Aznarte, Jose L and Morina, David and Alonso,
                      Sergio and Alvarez, Enric and Lopez, Daniel and Prats, Clara
                      and Burgard, Jan Pablo and Rodloff, Arne and Zimmermann, Tom
                      and Kuhlmann, Alexander and Zibert, Janez and Pennoni,
                      Fulvia and Divino, Fabio and Catala, Marti and Lovison,
                      Gianfranco and Giudici, Paolo and Tarantino, Barbara and
                      Bartolucci, Francesco and Jona Lasinio, Giovanna and
                      Mingione, Marco and Farcomeni, Alessio and Srivastava,
                      Ajitesh and Montero-Manso, Pablo and Adiga, Aniruddha and
                      Hurt, Benjamin and Lewis, Bryan and Marathe, Madhav and
                      Porebski, Przemyslaw and Venkatramanan, Srinivasan and
                      Bartczuk, Rafal P and Dreger, Filip and Gambin, Anna and
                      Gogolewski, Krzysztof and Gruziel-Slomka, Magdalena and
                      Krupa, Bartosz and Moszyński, Antoni and Niedzielewski,
                      Karol and Nowosielski, Jedrzej and Radwan, Maciej and
                      Rakowski, Franciszek and Semeniuk, Marcin and Szczurek, Ewa
                      and Zielinski, Jakub and Kisielewski, Jan and Pabjan,
                      Barbara and Holger, Kirsten and Kheifetz, Yuri and Scholz,
                      Markus and Przemyslaw, Biecek and Bodych, Marcin and
                      Filinski, Maciej and Idzikowski, Radoslaw and Krueger, Tyll
                      and Ozanski, Tomasz and Bracher, Johannes and Funk,
                      Sebastian},
      title        = {{P}redictive performance of multi-model ensemble forecasts
                      of {COVID}-19 across {E}uropean nations},
      journal      = {eLife},
      volume       = {12},
      issn         = {2050-084X},
      address      = {Cambridge},
      publisher    = {eLife Sciences Publications},
      reportid     = {FZJ-2023-02606},
      pages        = {e81916},
      year         = {2023},
      abstract     = {Background:Short-term forecasts of infectious disease
                      burden can contribute to situational awareness and aid
                      capacity planning. Based on best practice in other fields
                      and recent insights in infectious disease epidemiology, one
                      can maximise the predictive performance of such forecasts if
                      multiple models are combined into an ensemble. Here, we
                      report on the performance of ensembles in predicting
                      COVID-19 cases and deaths across Europe between 08 March
                      2021 and 07 March 2022.Methods:We used open-source tools to
                      develop a public European COVID-19 Forecast Hub. We invited
                      groups globally to contribute weekly forecasts for COVID-19
                      cases and deaths reported by a standardised source for 32
                      countries over the next 1–4 weeks. Teams submitted
                      forecasts from March 2021 using standardised quantiles of
                      the predictive distribution. Each week we created an
                      ensemble forecast, where each predictive quantile was
                      calculated as the equally-weighted average (initially the
                      mean and then from 26th July the median) of all individual
                      models’ predictive quantiles. We measured the performance
                      of each model using the relative Weighted Interval Score
                      (WIS), comparing models’ forecast accuracy relative to all
                      other models. We retrospectively explored alternative
                      methods for ensemble forecasts, including weighted averages
                      based on models’ past predictive performance.Results:Over
                      52 weeks, we collected forecasts from 48 unique models. We
                      evaluated 29 models’ forecast scores in comparison to the
                      ensemble model. We found a weekly ensemble had a
                      consistently strong performance across countries over time.
                      Across all horizons and locations, the ensemble performed
                      better on relative WIS than $83\%$ of participating
                      models’ forecasts of incident cases (with a total N=886
                      predictions from 23 unique models), and $91\%$ of
                      participating models’ forecasts of deaths (N=763
                      predictions from 20 models). Across a 1–4 week time
                      horizon, ensemble performance declined with longer forecast
                      periods when forecasting cases, but remained stable over 4
                      weeks for incident death forecasts. In every forecast across
                      32 countries, the ensemble outperformed most contributing
                      models when forecasting either cases or deaths, frequently
                      outperforming all of its individual component models. Among
                      several choices of ensemble methods we found that the most
                      influential and best choice was to use a median average of
                      models instead of using the mean, regardless of methods of
                      weighting component forecast models.Conclusions:Our results
                      support the use of combining forecasts from individual
                      models into an ensemble in order to improve predictive
                      performance across epidemiological targets and populations
                      during infectious disease epidemics. Our findings further
                      suggest that median ensemble methods yield better predictive
                      performance more than ones based on means. Our findings also
                      highlight that forecast consumers should place more weight
                      on incident death forecasts than incident case forecasts at
                      forecast horizons greater than 2 weeks.},
      cin          = {JSC},
      ddc          = {600},
      cid          = {I:(DE-Juel1)JSC-20090406},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511)},
      pid          = {G:(DE-HGF)POF4-5111},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {37083521},
      UT           = {WOS:001009734700001},
      doi          = {10.7554/eLife.81916},
      url          = {https://juser.fz-juelich.de/record/1009058},
}