1 #ifndef vtkKMeansDistanceFunctor_h
2 #define vtkKMeansDistanceFunctor_h
3 
4 /**
5  * @class   vtkKMeansDistanceFunctor
6  * @brief   measure distance from k-means cluster centers
7  *
8  * This is an abstract class (with a default concrete subclass) that implements
9  * algorithms used by the vtkKMeansStatistics filter that rely on a distance metric.
10  * If you wish to use a non-Euclidean distance metric (this could include
11  * working with strings that do not have a Euclidean distance metric, implementing
12  * k-mediods, or trying distance metrics in norms other than L2), you
13  * should subclass vtkKMeansDistanceFunctor.
14 */
15 
16 #include "vtkFiltersStatisticsModule.h" // For export macro
17 #include "vtkObject.h"
18 
19 class vtkVariantArray;
20 class vtkAbstractArray;
21 class vtkTable;
22 
23 class VTKFILTERSSTATISTICS_EXPORT vtkKMeansDistanceFunctor : public vtkObject
24 {
25 public:
26   static vtkKMeansDistanceFunctor* New();
27   vtkTypeMacro(vtkKMeansDistanceFunctor,vtkObject);
28   void PrintSelf( ostream& os, vtkIndent indent ) override;
29 
30   /**
31    * Return an empty tuple. These values are used as cluster center coordinates
32    * when no initial cluster centers are specified.
33    */
34   virtual vtkVariantArray* GetEmptyTuple( vtkIdType dimension );
35 
36   /**
37    * Compute the distance from one observation to another, returning the distance
38    * in the first argument.
39    */
40   virtual void operator() ( double&, vtkVariantArray*, vtkVariantArray * );
41 
42   /**
43    * This is called once per observation per run per iteration in order to assign the
44    * observation to its nearest cluster center after the distance functor has been
45    * evaluated for all the cluster centers.
46 
47    * The distance functor is responsible for incrementally updating the cluster centers
48    * to account for the assignment.
49    */
50   virtual void PairwiseUpdate( vtkTable* clusterCenters, vtkIdType row, vtkVariantArray* data, vtkIdType dataCardinality, vtkIdType totalCardinality );
51 
52   /**
53    * When a cluster center (1) has no observations that are closer to it than other cluster centers
54    * or (2) has exactly the same coordinates as another cluster center, its coordinates should be
55    * perturbed. This function should perform that perturbation.
56 
57    * Since perturbation relies on a distance metric, this function is the responsibility of the
58    * distance functor.
59    */
60   virtual void PerturbElement( vtkTable*, vtkTable*, vtkIdType, vtkIdType, vtkIdType, double );
61 
62   /**
63    * Allocate an array large enough to hold \a size coordinates and return a void pointer to this array.
64    * This is used by vtkPKMeansStatistics to send (receive) cluster center coordinates to (from) other processes.
65    */
66   virtual void* AllocateElementArray( vtkIdType size );
67 
68   /**
69    * Free an array allocated with AllocateElementArray.
70    */
71   virtual void DeallocateElementArray( void* );
72 
73   /**
74    * Return a vtkAbstractArray capable of holding cluster center coordinates.
75    * This is used by vtkPKMeansStatistics to hold cluster center coordinates sent to (received from) other processes.
76    */
77   virtual vtkAbstractArray*  CreateCoordinateArray();
78 
79   /**
80    * Pack the cluster center coordinates in \a vElements into columns of \a curTable.
81    * This code may assume that the columns in \a curTable are all of the type returned by \a GetNewVTKArray().
82    */
83   virtual void PackElements( vtkTable* curTable, void* vElements );
84 
85   //@{
86   /**
87    * Unpack the cluster center coordinates in \a vElements into columns of \a curTable.
88    * This code may assume that the columns in \a curTable are all of the type returned by \a GetNewVTKArray().
89    */
90   virtual void UnPackElements( vtkTable* curTable, vtkTable* newTable, void* vLocalElements, void* vGlobalElements, int np );
91   virtual void UnPackElements( vtkTable* curTable, void* vLocalElements, vtkIdType numRows, vtkIdType numCols );
92   //@}
93 
94   /**
95    * Return the data type used to store cluster center coordinates.
96    */
97   virtual int GetDataType();
98 
99 protected:
100   vtkKMeansDistanceFunctor();
101   ~vtkKMeansDistanceFunctor() override;
102 
103   vtkVariantArray* EmptyTuple; // Used to quickly initialize Tuple for each datum
104   vtkTable* CenterUpdates; // Used to hold online computation of next iteration's cluster center coords.
105 
106 private:
107   vtkKMeansDistanceFunctor( const vtkKMeansDistanceFunctor& ) = delete;
108   void operator = ( const vtkKMeansDistanceFunctor& ) = delete;
109 };
110 
111 #endif // vtkKMeansDistanceFunctor_h
112