1 #ifndef vtkKMeansDistanceFunctor_h
2 #define vtkKMeansDistanceFunctor_h
3 
4 // .NAME vtkKMeansDistanceFunctor - measure distance from k-means cluster centers
5 // .SECTION Description
6 // This is an abstract class (with a default concrete subclass) that implements
7 // algorithms used by the vtkKMeansStatistics filter that rely on a distance metric.
8 // If you wish to use a non-Euclidean distance metric (this could include
9 // working with strings that do not have a Euclidean distance metric, implementing
10 // k-mediods, or trying distance metrics in norms other than L2), you
11 // should subclass vtkKMeansDistanceFunctor.
12 
13 #include "vtkFiltersStatisticsModule.h" // For export macro
14 #include "vtkObject.h"
15 
16 class vtkVariantArray;
17 class vtkAbstractArray;
18 class vtkTable;
19 
20 class VTKFILTERSSTATISTICS_EXPORT vtkKMeansDistanceFunctor : public vtkObject
21 {
22 public:
23   static vtkKMeansDistanceFunctor* New();
24   vtkTypeMacro(vtkKMeansDistanceFunctor,vtkObject);
25   virtual void PrintSelf( ostream& os, vtkIndent indent );
26 
27   // Description:
28   // Return an empty tuple. These values are used as cluster center coordinates
29   // when no initial cluster centers are specified.
30   virtual vtkVariantArray* GetEmptyTuple( vtkIdType dimension );
31 
32   // Description:
33   // Compute the distance from one observation to another, returning the distance
34   // in the first argument.
35   virtual void operator() ( double&, vtkVariantArray*, vtkVariantArray * );
36 
37   // Description:
38   // This is called once per observation per run per iteration in order to assign the
39   // observation to its nearest cluster center after the distance functor has been
40   // evaluated for all the cluster centers.
41   //
42   // The distance functor is responsible for incrementally updating the cluster centers
43   // to account for the assignment.
44   virtual void PairwiseUpdate( vtkTable* clusterCenters, vtkIdType row, vtkVariantArray* data, vtkIdType dataCardinality, vtkIdType totalCardinality );
45 
46   // Description:
47   // When a cluster center (1) has no observations that are closer to it than other cluster centers
48   // or (2) has exactly the same coordinates as another cluster center, its coordinates should be
49   // perturbed. This function should perform that perturbation.
50   //
51   // Since perturbation relies on a distance metric, this function is the responsibility of the
52   // distance functor.
53   virtual void PerturbElement( vtkTable*, vtkTable*, vtkIdType, vtkIdType, vtkIdType, double );
54 
55   // Description:
56   // Allocate an array large enough to hold \a size coordinates and return a void pointer to this array.
57   // This is used by vtkPKMeansStatistics to send (receive) cluster center coordinates to (from) other processes.
58   virtual void* AllocateElementArray( vtkIdType size );
59 
60   // Description:
61   // Free an array allocated with AllocateElementArray.
62   virtual void DeallocateElementArray( void* );
63 
64   // Description:
65   // Return a vtkAbstractArray capable of holding cluster center coordinates.
66   // This is used by vtkPKMeansStatistics to hold cluster center coordinates sent to (received from) other processes.
67   virtual vtkAbstractArray*  CreateCoordinateArray();
68 
69   // Description:
70   // Pack the cluster center coordinates in \a vElements into columns of \a curTable.
71   // This code may assume that the columns in \a curTable are all of the type returned by \a GetNewVTKArray().
72   virtual void PackElements( vtkTable* curTable, void* vElements );
73 
74   // Description:
75   // Unpack the cluster center coordinates in \a vElements into columns of \a curTable.
76   // This code may assume that the columns in \a curTable are all of the type returned by \a GetNewVTKArray().
77   virtual void UnPackElements( vtkTable* curTable, vtkTable* newTable, void* vLocalElements, void* vGlobalElements, int np );
78   virtual void UnPackElements( vtkTable* curTable, void* vLocalElements, vtkIdType numRows, vtkIdType numCols );
79 
80   // Description:
81   // Return the data type used to store cluster center coordinates.
82   virtual int GetDataType();
83 
84 protected:
85   vtkKMeansDistanceFunctor();
86   virtual ~vtkKMeansDistanceFunctor();
87 
88   vtkVariantArray* EmptyTuple; // Used to quickly initialize Tuple for each datum
89   vtkTable* CenterUpdates; // Used to hold online computation of next iteration's cluster center coords.
90 
91 private:
92   vtkKMeansDistanceFunctor( const vtkKMeansDistanceFunctor& ); // Not implemented.
93   void operator = ( const vtkKMeansDistanceFunctor& ); // Not implemented.
94 };
95 
96 #endif // vtkKMeansDistanceFunctor_h
97