% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Abubaker:1034454,
      author       = {Abubaker, Mohammed and Alsadder, Zubayda and Abdelhaq,
                      Hamed and Boltes, Maik and Alia, Ahmed},
      title        = {{RPEE}-{H}eads: {A} {N}ovel {B}enchmark {F}or {P}edestrian
                      {H}ead {D}etection in {C}rowd {V}ideos},
      publisher    = {arXiv},
      reportid     = {FZJ-2024-07220, https://doi.org/10.48550/arXiv.2411.18164},
      year         = {2024},
      abstract     = {The automatic detection of pedestrian heads in crowded
                      environments is essential for crowd analysis and management
                      tasks, particularly in high-risk settings such as railway
                      platforms and event entrances. These environments,
                      characterized by dense crowds and dynamic movements, are
                      underrepresented in public datasets, posing challenges for
                      existing deep learning models. To address this gap, we
                      introduce the Railway Platforms and Event Entrances-Heads
                      (RPEE-Heads) dataset, a novel, diverse, highresolution, and
                      accurately annotated resource. It includes 109,913 annotated
                      pedestrian heads across 1,886 images from 66 video
                      recordings, with an average of 56.2 heads per image.
                      Annotations include bounding boxes for visible head regions.
                      In addition to introducing the RPEE-Heads dataset, this
                      paper evaluates eight state-of-the-art object detection
                      algorithms using the RPEE-Heads dataset and analyzes the
                      impact of head size on detection accuracy. The experimental
                      results show that You Only Look Once v9 and Real-Time
                      Detection Transformer outperform the other algorithms,
                      achieving mean average precisions of $90.7\%$ and $90.8\%,$
                      with inference times of 11 and 14 milliseconds,
                      respectively. Moreover, the findings underscore the need for
                      specialized datasets like RPEE-Heads for training and
                      evaluating accurate models for head detection in railway
                      platforms and event entrances. The dataset and pretrained
                      models are available at
                      https://doi.org/10.34735/ped.2024.2.},
      keywords     = {Computer Vision and Pattern Recognition (cs.CV) (Other) /
                      Machine Learning (cs.LG) (Other) / FOS: Computer and
                      information sciences (Other)},
      cin          = {IAS-7},
      cid          = {I:(DE-Juel1)IAS-7-20180321},
      pnm          = {5111 - Domain-Specific Simulation $\&$ Data Life Cycle Labs
                      (SDLs) and Research Groups (POF4-511) / Pilotprojekt zur
                      Entwicklung eines palästinensisch-deutschen Forschungs- und
                      Promotionsprogramms 'Palestinian-German Science Bridge'
                      (01DH16027)},
      pid          = {G:(DE-HGF)POF4-5111 / G:(BMBF)01DH16027},
      typ          = {PUB:(DE-HGF)25},
      doi          = {10.48550/arXiv.2411.18164},
      url          = {https://juser.fz-juelich.de/record/1034454},
}