% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Gtz:276347,
author = {Götz, Markus and Bodenstein, Christian and Riedel, Morris},
title = {{HPDBSCAN} - {H}ighly parallel {DBSCAN}},
publisher = {ACM Press New York, New York, USA},
reportid = {FZJ-2015-06807},
isbn = {978-1-4503-4006-9},
pages = {2},
year = {2015},
comment = {Proceedings of the Workshop on Machine Learning in
High-Performance Computing Environments - MLHPC '15},
booktitle = {Proceedings of the Workshop on Machine
Learning in High-Performance Computing
Environments - MLHPC '15},
abstract = {Clustering algorithms in the field of data-mining are used
to aggregate similar objects into common groups. One of the
best-known of these algorithms is called DBSCAN. Its
distinct design enables the search for an apriori unknown
number of arbitrarily shaped clusters, and at the same time
allows to filter out noise. Due to its sequential
formulation, the parallelization of DBSCAN renders a
challenge. In this paper we present a new parallel approach
which we call HPDBSCAN. It employs three major techniques in
order to break the sequentiality, empower workload-balancing
as well as speed up neighborhood searches in distributed
parallel processing environments i) a computation split
heuristic for domain decomposition, ii) a data index
preprocessing step and iii) a rule-based cluster merging
scheme.As a proof-of-concept we implemented HPDBSCAN as an
OpenMP/MPI hybrid application. Using real-world data sets,
such as a point cloud from the old town of Bremen, Germany,
we demonstrate that our implementation is able to achieve a
significant speed-up and scale-up in common HPC setups.
Moreover, we compare our approach with previous attempts to
parallelize DBSCAN showing an order of magnitude improvement
in terms of computation time and memory consumption.},
month = {Nov},
date = {2015-11-15},
organization = {Workshop Workshop on Machine Learning
in High-Performance Computing
Environments, subworkshop to
Supercomputing 2015, Austin (Texas), 15
Nov 2015 - 15 Nov 2015},
cin = {JSC},
cid = {I:(DE-Juel1)JSC-20090406},
pnm = {512 - Data-Intensive Science and Federated Computing
(POF3-512)},
pid = {G:(DE-HGF)POF3-512},
typ = {PUB:(DE-HGF)8},
doi = {10.1145/2834892.2834894},
url = {https://juser.fz-juelich.de/record/276347},
}