#ifndef TTmplDensityCluster_seen #define TTmplDensityCluster_seen #include #include #include #include namespace { template struct decreasingClusterSize : public std::binary_function { bool operator() (const T& lhs, const T& rhs) { return lhs.size() > rhs.size(); } }; } namespace COMET { template /// For a detailed description of the density-based clustering, Google keyword: /// density-based clustering. /// /// A template class that performs density-based clustering using a customized /// metric. Users have to define a class that calculates the 'distance' /// between two points. For example, a template class for clustering hits /// using time information: /// \code /// template /// class TimeDistance { /// public: /// double operator(T lhs, T rhs) { /// return std::abs(lhs->GetTime() - rhs->GetTime()); /// } /// }; /// \endcode /// Template class for clustering hits using position. /// \code /// template /// class ISpatialDistance { /// public: /// double operator(T lhs, T rhs) { /// double x1 = lhs->GetPosition().Z(); /// double y1 = 0.0; /// if (lhs->IsXHit()) y1 = lhs->GetPosition().X(); /// if (lhs->IsYHit()) y1 = lhs->GetPosition().Y(); /// /// double x2 = rhs->GetPosition().Z(); /// double y2 = 0.0; /// if (rhs->IsXHit()) y2 = rhs->GetPosition().X(); /// if (rhs->IsYHit()) y2 = rhs->GetPosition().Y(); /// /// return std::sqrt(pow(x1-x2, 2) + pow(y1-y2, 2)); /// } /// }; /// \endcode /// To use this class /// \code /// // Work around template parsing bug in some GCC versions... /// typedef COMET::IHandle Arg; /// /// // Make a typedef for the ClusterAlgorithm... /// typedef ITmplDensityCluster ClusterAlgorithm; /// /// const double epsilon = 20*unit::cm; /// const unsigned int minPoints = 4; /// /// std::auto_ptr /// xcluster(new ClusterAlgorithm(minPoints, epsilon)); /// xcluster->Cluster(xHits); /// /// std::auto_ptr /// ycluster(new ClusterAlgorithm(minPoints, epsilon)); /// ycluster->Cluster(yHits); /// \endcode /// /// If you know the exact type that will be passed to the MetricModel, your /// metric model class doesn't need to be a template. For instance, if you /// want to cluster a vector of times that is defined as /// \code /// std::vector timeVector /// \endcode /// you could use a very simple metric model /// \code /// class TimeMetricModel { /// public: /// double operator() (double lhs, double rhs) { /// return std::abs(lhs-rhs); /// } /// } /// \endcode /// This will be used in code as follows: /// \code /// typedef ITmplDensityCluster TimeCluster; /// /// TimeCluster timeCluster(4,40*unit::ns); /// /// timeCluster.Cluster(timeVector); /// \endcode /// The clustering algorithm can also be used directly with iterators, so that /// the last example would be: /// \code /// typedef ITmplDensityCluster TimeCluster; /// /// TimeCluster timeCluster(4,40*unit::ns); /// /// timeCluster.Cluster(timeVector.begin(), timeVector.end()); /// \endcode /// /// Sometimes you will have a metric model that takes arguments in the /// constructor. An example of this might be a clustering where the X and Y /// dimensions have a different scale factor such as /// \code /// class ScaledMetric { /// private: /// double fXScale; /// double fYScale; /// double fZScale; /// public: /// ScaledMetric(double xScale, double yScale, zScale) /// : fXScale(xScale), fYScale(yScale), {} /// /// operator () (const TVector3& lhs, const TVector3& rhs) { /// double x = fXScale*(lhs.X()-rhs.X()); /// double y = fYScale*(lhs.Y()-rhs.Y()); /// double z = fZScale*(lhs.Z()-rhs.Z()); /// return std::sqrt(x*x + y*y + z*z); /// } /// } /// \endcode /// /// In this case, the clustering class would be used as follows: /// /// \code /// std::vector positionVector; /// /// typedef ITmplDensityCluster ScaledCluster; /// /// ScaledCluster ScaledCluster(4,40*unit::cm, ScaledMetric(10,5,10)); /// /// scaledCluster.Cluster(positionVector.begin(), positionVector.end()); /// // or /// scaledCluster.Cluster(positionVector); /// \endcode class ITmplDensityCluster { public: /// A collection of points for use in clustering. A Points collection is /// returned by GetCluster(). typedef std::list Points; /// An iterator to move through the Points collection (mostly for internal /// use). typedef typename Points::iterator Iterator; /// A constant iterator to move through the Points collection (mostly for /// internal use). typedef typename Points::const_iterator ConstIterator; /// Create a density clustering class that requires at least minPts within /// a distance of epsilon. The distance is defined by the return value of /// the MetricModel::operator() method (for example, TimeDistance or /// SpacialDistance described in the class documentation). The /// MetricModel class will be constructed using the default constructor /// (e.g. MetricModel()) explicit ITmplDensityCluster(unsigned int minPts, double epsilon); /// Create a density clustering class that requires at least minPts within /// a distance of epsilon. The distance is defined by the return value of /// the MetricModel::operator() method (for example, TimeDistance or /// SpacialDistance described in the class documentation). This form /// takes an explicitly constructed MetricModel class, so that code can /// pass arguments to the MetricModel constructor. explicit ITmplDensityCluster(unsigned int minPts, double epsilon, MetricModel metric); virtual ~ITmplDensityCluster() {} /// Cluster a vector of objects. The results are accessed using /// GetCluster(). void Cluster(const std::vector&); /// Cluster a group of objects between the begin and end iterator. The /// results are accessed using GetCluster(). template void Cluster(InputIterator begin, InputIterator end); /// Return the number of clusters found by the density clustering. This /// is only valid after the Cluster() method has been used. unsigned int GetClusterCount() { return fClusters.size(); } /// Get the i-th cluster. This is only valid after the cluster method has /// been used. If the index is equal to the number of found clusters, /// then the return value will be the list of unclustered points. const Points& GetCluster(unsigned int i) const { if (i == fClusters.size()) return fRemainingPoints; return fClusters.at(i); } /// A convenient member that returns the points in a cluster. This is a /// relatively expensive method since the points are returned as a vector /// (by value). If the index is greater than the number of found clusters, /// then the return value will be the list of unclustered points. std::vector GetPoints(unsigned int) const; /// Validate that the all of the points are either part of a cluster, or /// part of the unclustered points. void Check(); protected: /// Find the set of points with highest density in input. If the density /// is greater that fMinPoints, then return the set of points in output. void FindSeeds(const Points& input, Points& output); /// Find the neighbors for a point in the input points. A point is not a /// neighbor to itself. Neighbors are defined as all points for which the /// fMetricModel returns a value less than fMaxDistance. Any neighbors /// that are found will be returned in output. std::size_t GetNeighbors(T point, const Points& input, Points& output); /// Count the neighbors for a point in the input points, but do not return /// a copy of the neighbors. The only difference between this and /// GetNeighbors is that the neighboring points are not returned by /// CountNeighbors. std::size_t CountNeighbors(T point, const Points& input); /// Remove the seeds from the inputs. This changes the seeds since they /// are sorted as part of the algorithm. The inputs must already be /// sorted. void RemoveSeeds(Points& inputs, Points& seeds); private: /// The minimum number of points that must be within the fMaxDistance /// radius of the current point. If there are at least fMinPoints with in /// the fMaxDistance of the current point, the cluster will be expanded by /// the neighbors. The distance between points is defined by /// fMetricModel. unsigned int fMinPoints; /// The maximum distance between points for which points are defined as /// being neighbors. The distance between the points is defined by /// fMetricModel. double fMaxDistance; /// A class that calculates the distance between points. The MetricModel /// class must define (at least) an method equivalent to "double /// operator() (T lhs, T rhs)". Examples of possible operators are /// \code /// double operator() (T lhs, T rhs); /// double operator() (const T& lhs, const T& rhs); /// \endcode MetricModel fMetricModel; /// An internal vector of clusters used to cache results. std::vector fClusters; /// An internal collection of Points holding the points that have not yet /// been added to a cluster. Points fRemainingPoints; }; //////////////////////////////////////////////////////////////// // Define the ITmplDensityCluster class methods. //////////////////////////////////////////////////////////////// template ITmplDensityCluster::ITmplDensityCluster(unsigned int MinPts, double epsilon, MetricModel metric) : fMinPoints(MinPts), fMaxDistance(epsilon), fMetricModel(metric) { } template ITmplDensityCluster::ITmplDensityCluster(unsigned int MinPts, double epsilon) : fMinPoints(MinPts), fMaxDistance(epsilon), fMetricModel(MetricModel()) { } template void ITmplDensityCluster::Cluster(const std::vector& pnts) { Cluster(pnts.begin(), pnts.end()); } template template void ITmplDensityCluster::Cluster(InputIterator begin, InputIterator end) { Points seeds; // Clear out the internal data structures. fClusters.clear(); fRemainingPoints.clear(); // Move the input points into the fRemainingPoints and the sort them so // they are in a defined order. std::copy(begin, end, std::back_inserter(fRemainingPoints)); fRemainingPoints.sort(); // Now continue removing points until there aren't any more points, or a // seed isn't found. while (!fRemainingPoints.empty()) { FindSeeds(fRemainingPoints,seeds); if (seeds.size() < fMinPoints) break; // Remove the seeds from the remaining points. RemoveSeeds(fRemainingPoints,seeds); // Add the seeds to the cluster that's being built. Points cluster; std::copy(seeds.begin(), seeds.end(), std::back_inserter(cluster)); Points tmp; while (!seeds.empty()) { T currentP = seeds.front(); seeds.erase(seeds.begin()); tmp.clear(); std::size_t i = GetNeighbors(currentP, fRemainingPoints, tmp); i += CountNeighbors(currentP, seeds); if (i < fMinPoints) continue; std::copy(tmp.begin(),tmp.end(),std::back_inserter(seeds)); std::copy(tmp.begin(),tmp.end(),std::back_inserter(cluster)); RemoveSeeds(fRemainingPoints,tmp); } fClusters.push_back(cluster); } std::sort(fClusters.begin(), fClusters.end(), decreasingClusterSize()); } template void ITmplDensityCluster::Check() { typedef typename std::vector::const_iterator Iterator; std::size_t clusterPointCount = 0; for (Iterator c = fClusters.begin(); c != fClusters.end(); ++c) { std::size_t clusterSize = c->size(); clusterPointCount += clusterSize; } std::size_t totalPointCount = clusterPointCount + fRemainingPoints.size(); } template void ITmplDensityCluster::FindSeeds( const Points& in, Points& out) { out.clear(); Points seeds; for (ConstIterator h = in.begin(); h != in.end(); ++h) { seeds.clear(); std::size_t i = GetNeighbors(*h, in, seeds); if (i std::size_t ITmplDensityCluster::GetNeighbors( T pnt, const Points& in, Points& out) { std::size_t size = 0; for (ConstIterator h = in.begin(); h != in.end(); ++h) { if (pnt == (*h)) continue; double distance = fMetricModel(pnt, *h); if (distance < fMaxDistance) { out.push_back(*h); ++size; } } return size; } template std::size_t ITmplDensityCluster::CountNeighbors(T pnt, const Points& in) { std::size_t size = 0; for (ConstIterator h = in.begin(); h != in.end(); ++h) { if (pnt == (*h)) continue; double distance = fMetricModel(pnt, *h); if (distance < fMaxDistance) { ++size; } } return size; } template void ITmplDensityCluster::RemoveSeeds(Points& input, Points& seeds) { Iterator target = input.begin(); seeds.sort(); for (ConstIterator h = seeds.begin(); h != seeds.end(); ++h) { while (target!=input.end()) { if (*target < *h) ++target; else if (*h == *target) { target = input.erase(target); } else break; } } } template std::vector ITmplDensityCluster::GetPoints(unsigned int index) const { std::vector points; const Points& cluster = GetCluster(index); std::copy(cluster.begin(), cluster.end(), std::back_inserter(points)); return points; } }; // End of "namespace COMET" from around line 25. #endif