1"""
2Some methods which are useful but didn't quite fit into the graph classes.
3
4"""
5import logging
6import numpy
7import scipy.sparse
8
9class GraphUtils(object):
10    def __init__(self):
11        pass
12
13    @staticmethod
14    def vertexLabelPairs(graph, edgeArray):
15        """
16        Create an array of pairs of vertex labels for each edge in the graph. Returns
17        a tuple of this array and a vector of corresponding edge weights, for the
18        indices of the full edge index array as given by getAllEdges.
19
20        :param edgeArray: A numpy array with 2 columns, and values corresponding to vertex indices.
21        :type edgeArray: :class:`numpy.ndarray`
22
23        """
24        #Parameter.checkList(edgeArray, Parameter.checkIndex, (0, self.getNumVertices()))
25
26        numFeatures = graph.getVertexList().getNumFeatures()
27        X = numpy.zeros((edgeArray.shape[0], numFeatures*2))
28
29        for i in range(edgeArray.shape[0]):
30            X[i, 0:numFeatures] = graph.getVertex(edgeArray[i, 0])
31            X[i, numFeatures:numFeatures*2] = graph.getVertex(edgeArray[i, 1])
32
33        return X
34
35    @staticmethod
36    def vertexLabelExamples(graph):
37        """
38        Return a set of examples with pairs of vertex labels connected by an
39        edge. For undircted graphs there exists an example (v_i, v_j) for
40        every (v_j, v_i). Also, there is a set of negative examples where the
41        edge does not exist.
42        """
43        numFeatures = graph.getVertexList().getNumFeatures()
44        numEdges = graph.getNumEdges()
45
46        #Also add non-edges
47        logging.info("Computing graph complement")
48        cGraph = graph.complement()
49        logging.info("Done with " + str(cGraph.getNumEdges()) + " edges.")
50        perm = numpy.random.permutation(cGraph.getNumEdges())[0:numEdges]
51
52        X = GraphUtils.vertexLabelPairs(graph, graph.getAllEdges())
53        Xc = GraphUtils.vertexLabelPairs(cGraph, cGraph.getAllEdges()[perm, :])
54        X = numpy.r_[X, Xc]
55
56        y = numpy.ones(numEdges*2)
57        y[numEdges:numEdges*2] = -1
58        logging.debug(y)
59
60        #If the graph is undirected add reverse edges
61        if graph.isUndirected():
62            X2 = numpy.zeros((numEdges*2, numFeatures*2))
63            X2[:, 0:numFeatures] = X[:, numFeatures:numFeatures*2]
64            X2[:, numFeatures:numFeatures*2] = X[:, 0:numFeatures]
65            X = numpy.r_[X, X2]
66            y = numpy.r_[y, y]
67
68        return X, y
69
70    @staticmethod
71    def treeRoot(treeGraph):
72        """
73        Find the root of the given tree
74        """
75        inDegSeq = treeGraph.inDegreeSequence()
76        root = numpy.nonzero(inDegSeq==0)[0][0]
77        return root
78
79    @staticmethod
80    def treeDepth(treeGraph):
81        """
82        Find the depth of the given tree.
83        """
84        if treeGraph.getNumVertices()==0:
85            return 0
86
87        if not treeGraph.isTree():
88            raise ValueError("Input graph is not a tree")
89
90        root = GraphUtils.treeRoot(treeGraph)
91        distances = treeGraph.dijkstrasAlgorithm(root)
92        return numpy.max(distances[distances!=float('inf')])
93
94
95    @staticmethod
96    def modularity(W, clustering):
97        """
98        Give a symmetric weight matrix W and a clustering array "clustering", compute the
99        modularity of Newman and Girvan. The input matrix W can be either an
100        ndarray or a scipy.sparse matrix.
101        """
102        numVertices = W.shape[0]
103        clusterIds = numpy.unique(clustering)
104
105        if type(W) == numpy.ndarray:
106            degSequence = numpy.sum(W, 0)
107        else:
108            degSequence = numpy.array(W.sum(0)).ravel()
109
110        numEdges = numpy.sum(degSequence)/2.0
111        Q = 0
112
113        for i in clusterIds:
114            inds = numpy.arange(numVertices)[i==clustering]
115            subW = W[inds, :][:, inds]
116
117            Q += subW.sum()
118            Q -= degSequence[inds].sum()**2/(2.0*numEdges)
119
120        Q = Q/(2*numEdges)
121        return Q
122
123
124    @staticmethod
125    def kwayNormalisedCut(W, clustering):
126        """
127        Do k-way normalised cut. Each cluster should have at least 1 edge. The input
128        matrix W can be either an ndarray or a scipy.sparse matrix.
129        """
130        numVertices = W.shape[0]
131        clusterIds = numpy.unique(clustering)
132
133        Q = 0
134        for i in clusterIds:
135            inds = numpy.arange(numVertices)[i==clustering]
136            invInds = numpy.arange(numVertices)[i!=clustering]
137            numClustEdges = float((W[inds, :]).sum())
138            if (len(invInds) != 0) and (numClustEdges != 0):
139                Q += (W[inds, :][:, invInds]).sum()/numClustEdges
140
141        Q = Q/clusterIds.shape[0]
142        return Q
143
144
145    @staticmethod
146    def shiftLaplacian(W):
147        """
148        Give a scipy sparse csr matrix W, compute the shifted Laplacian matrix,
149        which is defined as I + D^-0.5 W D^-0.5 where D is a diagonal matrix of
150        degrees. For vertices of degree zero, the corresponding row/col of the
151        Laplacian is zero with a 0 at the diagonal. The eigenvalues of the shift
152        Laplacian are between 0 and 2.
153        """
154        if not scipy.sparse.isspmatrix_csr(W):
155            raise ValueError("W is not a csr matrix")
156
157        W = scipy.sparse.csr_matrix(W, dtype=numpy.float)
158
159        d = numpy.array(W.sum(0)).ravel()
160        d[d!=0] = d[d!=0]**-0.5
161        D = scipy.sparse.spdiags(d, 0, d.shape[0], d.shape[0], format='csr')
162
163        i = numpy.zeros(W.shape[0])
164        i[d!=0] = 1
165        I = scipy.sparse.spdiags(i, 0, i.shape[0], i.shape[0], format='csr')
166
167        Lhat = I + D.dot(W).dot(D)
168        return Lhat
169
170    @staticmethod
171    def normalisedLaplacianSym(W):
172        """
173        Give a scipy sparse csr matrix W, compute the normalised Laplacian matrix,
174        which is defined as I - D^-0.5 W D^-0.5 where D is a diagonal matrix of
175        degrees. For vertices of degree zero, the corresponding row/col of the
176        Laplacian is zero with a 0 at the diagonal. The eigenvalues of the
177        Laplacian are between 0 and 2.
178        """
179        if not scipy.sparse.isspmatrix_csr(W):
180            raise ValueError("W is not a csr matrix")
181        W = scipy.sparse.csr_matrix(W, dtype=numpy.float)
182        d = numpy.array(W.sum(0)).ravel()
183        d[d!=0] = d[d!=0]**-0.5
184        D = scipy.sparse.spdiags(d, 0, d.shape[0], d.shape[0], format='csr')
185
186        i = numpy.zeros(W.shape[0])
187        i[d!=0] = 1
188        I = scipy.sparse.spdiags(i, 0, i.shape[0], i.shape[0], format='csr')
189
190        Lhat = I - D.dot(W).dot(D)
191        return Lhat
192
193    @staticmethod
194    def normalisedLaplacianRw(W):
195        """
196        Compute the random walk Laplacian matrix given by D^-1 L where L is the
197        unnormalised Laplacian.
198        """
199        if not scipy.sparse.isspmatrix_csr(W):
200            raise ValueError("W is not a csr matrix")
201
202        d = numpy.array(W.sum(0)).ravel()
203        d[d!=0] = d[d!=0]**-1
204        D = scipy.sparse.spdiags(d, 0, d.shape[0], d.shape[0], format='csr')
205
206        i = numpy.zeros(W.shape[0])
207        i[d!=0] = 1
208        I = scipy.sparse.spdiags(i, 0, i.shape[0], i.shape[0], format='csr')
209
210        Lhat = I - D.dot(W)
211        return Lhat
212
213    @staticmethod
214    def modularityMatrix(W):
215        """
216        Compute the modularity matrix from a weight matrix W.
217        """
218        if not scipy.sparse.isspmatrix_csr(W):
219            raise ValueError("W is not a csr matrix")
220
221        d = numpy.array(W.sum(0)).ravel()
222        m = W.getnnz()/2
223
224        B = W - numpy.outer(d, d)/(2*m)
225
226        return B
227
228    @staticmethod
229    def randIndex(clustering1, clustering2):
230        """
231        Compute the rand index for 2 clusterings given in arrays v1 and v2.
232        """
233        numVertices = clustering1.shape[0]
234        error = 0
235
236        for i in range(numVertices):
237            same_cl = clustering1[i] == clustering1
238            same_learned_cl = clustering2[i] == clustering2
239            error += (same_cl != same_learned_cl).sum()
240
241        return float(error)/(numVertices*(numVertices-1))
242