1""" 2Some methods which are useful but didn't quite fit into the graph classes. 3 4""" 5import logging 6import numpy 7import scipy.sparse 8 9class GraphUtils(object): 10 def __init__(self): 11 pass 12 13 @staticmethod 14 def vertexLabelPairs(graph, edgeArray): 15 """ 16 Create an array of pairs of vertex labels for each edge in the graph. Returns 17 a tuple of this array and a vector of corresponding edge weights, for the 18 indices of the full edge index array as given by getAllEdges. 19 20 :param edgeArray: A numpy array with 2 columns, and values corresponding to vertex indices. 21 :type edgeArray: :class:`numpy.ndarray` 22 23 """ 24 #Parameter.checkList(edgeArray, Parameter.checkIndex, (0, self.getNumVertices())) 25 26 numFeatures = graph.getVertexList().getNumFeatures() 27 X = numpy.zeros((edgeArray.shape[0], numFeatures*2)) 28 29 for i in range(edgeArray.shape[0]): 30 X[i, 0:numFeatures] = graph.getVertex(edgeArray[i, 0]) 31 X[i, numFeatures:numFeatures*2] = graph.getVertex(edgeArray[i, 1]) 32 33 return X 34 35 @staticmethod 36 def vertexLabelExamples(graph): 37 """ 38 Return a set of examples with pairs of vertex labels connected by an 39 edge. For undircted graphs there exists an example (v_i, v_j) for 40 every (v_j, v_i). Also, there is a set of negative examples where the 41 edge does not exist. 42 """ 43 numFeatures = graph.getVertexList().getNumFeatures() 44 numEdges = graph.getNumEdges() 45 46 #Also add non-edges 47 logging.info("Computing graph complement") 48 cGraph = graph.complement() 49 logging.info("Done with " + str(cGraph.getNumEdges()) + " edges.") 50 perm = numpy.random.permutation(cGraph.getNumEdges())[0:numEdges] 51 52 X = GraphUtils.vertexLabelPairs(graph, graph.getAllEdges()) 53 Xc = GraphUtils.vertexLabelPairs(cGraph, cGraph.getAllEdges()[perm, :]) 54 X = numpy.r_[X, Xc] 55 56 y = numpy.ones(numEdges*2) 57 y[numEdges:numEdges*2] = -1 58 logging.debug(y) 59 60 #If the graph is undirected add reverse edges 61 if graph.isUndirected(): 62 X2 = numpy.zeros((numEdges*2, numFeatures*2)) 63 X2[:, 0:numFeatures] = X[:, numFeatures:numFeatures*2] 64 X2[:, numFeatures:numFeatures*2] = X[:, 0:numFeatures] 65 X = numpy.r_[X, X2] 66 y = numpy.r_[y, y] 67 68 return X, y 69 70 @staticmethod 71 def treeRoot(treeGraph): 72 """ 73 Find the root of the given tree 74 """ 75 inDegSeq = treeGraph.inDegreeSequence() 76 root = numpy.nonzero(inDegSeq==0)[0][0] 77 return root 78 79 @staticmethod 80 def treeDepth(treeGraph): 81 """ 82 Find the depth of the given tree. 83 """ 84 if treeGraph.getNumVertices()==0: 85 return 0 86 87 if not treeGraph.isTree(): 88 raise ValueError("Input graph is not a tree") 89 90 root = GraphUtils.treeRoot(treeGraph) 91 distances = treeGraph.dijkstrasAlgorithm(root) 92 return numpy.max(distances[distances!=float('inf')]) 93 94 95 @staticmethod 96 def modularity(W, clustering): 97 """ 98 Give a symmetric weight matrix W and a clustering array "clustering", compute the 99 modularity of Newman and Girvan. The input matrix W can be either an 100 ndarray or a scipy.sparse matrix. 101 """ 102 numVertices = W.shape[0] 103 clusterIds = numpy.unique(clustering) 104 105 if type(W) == numpy.ndarray: 106 degSequence = numpy.sum(W, 0) 107 else: 108 degSequence = numpy.array(W.sum(0)).ravel() 109 110 numEdges = numpy.sum(degSequence)/2.0 111 Q = 0 112 113 for i in clusterIds: 114 inds = numpy.arange(numVertices)[i==clustering] 115 subW = W[inds, :][:, inds] 116 117 Q += subW.sum() 118 Q -= degSequence[inds].sum()**2/(2.0*numEdges) 119 120 Q = Q/(2*numEdges) 121 return Q 122 123 124 @staticmethod 125 def kwayNormalisedCut(W, clustering): 126 """ 127 Do k-way normalised cut. Each cluster should have at least 1 edge. The input 128 matrix W can be either an ndarray or a scipy.sparse matrix. 129 """ 130 numVertices = W.shape[0] 131 clusterIds = numpy.unique(clustering) 132 133 Q = 0 134 for i in clusterIds: 135 inds = numpy.arange(numVertices)[i==clustering] 136 invInds = numpy.arange(numVertices)[i!=clustering] 137 numClustEdges = float((W[inds, :]).sum()) 138 if (len(invInds) != 0) and (numClustEdges != 0): 139 Q += (W[inds, :][:, invInds]).sum()/numClustEdges 140 141 Q = Q/clusterIds.shape[0] 142 return Q 143 144 145 @staticmethod 146 def shiftLaplacian(W): 147 """ 148 Give a scipy sparse csr matrix W, compute the shifted Laplacian matrix, 149 which is defined as I + D^-0.5 W D^-0.5 where D is a diagonal matrix of 150 degrees. For vertices of degree zero, the corresponding row/col of the 151 Laplacian is zero with a 0 at the diagonal. The eigenvalues of the shift 152 Laplacian are between 0 and 2. 153 """ 154 if not scipy.sparse.isspmatrix_csr(W): 155 raise ValueError("W is not a csr matrix") 156 157 W = scipy.sparse.csr_matrix(W, dtype=numpy.float) 158 159 d = numpy.array(W.sum(0)).ravel() 160 d[d!=0] = d[d!=0]**-0.5 161 D = scipy.sparse.spdiags(d, 0, d.shape[0], d.shape[0], format='csr') 162 163 i = numpy.zeros(W.shape[0]) 164 i[d!=0] = 1 165 I = scipy.sparse.spdiags(i, 0, i.shape[0], i.shape[0], format='csr') 166 167 Lhat = I + D.dot(W).dot(D) 168 return Lhat 169 170 @staticmethod 171 def normalisedLaplacianSym(W): 172 """ 173 Give a scipy sparse csr matrix W, compute the normalised Laplacian matrix, 174 which is defined as I - D^-0.5 W D^-0.5 where D is a diagonal matrix of 175 degrees. For vertices of degree zero, the corresponding row/col of the 176 Laplacian is zero with a 0 at the diagonal. The eigenvalues of the 177 Laplacian are between 0 and 2. 178 """ 179 if not scipy.sparse.isspmatrix_csr(W): 180 raise ValueError("W is not a csr matrix") 181 W = scipy.sparse.csr_matrix(W, dtype=numpy.float) 182 d = numpy.array(W.sum(0)).ravel() 183 d[d!=0] = d[d!=0]**-0.5 184 D = scipy.sparse.spdiags(d, 0, d.shape[0], d.shape[0], format='csr') 185 186 i = numpy.zeros(W.shape[0]) 187 i[d!=0] = 1 188 I = scipy.sparse.spdiags(i, 0, i.shape[0], i.shape[0], format='csr') 189 190 Lhat = I - D.dot(W).dot(D) 191 return Lhat 192 193 @staticmethod 194 def normalisedLaplacianRw(W): 195 """ 196 Compute the random walk Laplacian matrix given by D^-1 L where L is the 197 unnormalised Laplacian. 198 """ 199 if not scipy.sparse.isspmatrix_csr(W): 200 raise ValueError("W is not a csr matrix") 201 202 d = numpy.array(W.sum(0)).ravel() 203 d[d!=0] = d[d!=0]**-1 204 D = scipy.sparse.spdiags(d, 0, d.shape[0], d.shape[0], format='csr') 205 206 i = numpy.zeros(W.shape[0]) 207 i[d!=0] = 1 208 I = scipy.sparse.spdiags(i, 0, i.shape[0], i.shape[0], format='csr') 209 210 Lhat = I - D.dot(W) 211 return Lhat 212 213 @staticmethod 214 def modularityMatrix(W): 215 """ 216 Compute the modularity matrix from a weight matrix W. 217 """ 218 if not scipy.sparse.isspmatrix_csr(W): 219 raise ValueError("W is not a csr matrix") 220 221 d = numpy.array(W.sum(0)).ravel() 222 m = W.getnnz()/2 223 224 B = W - numpy.outer(d, d)/(2*m) 225 226 return B 227 228 @staticmethod 229 def randIndex(clustering1, clustering2): 230 """ 231 Compute the rand index for 2 clusterings given in arrays v1 and v2. 232 """ 233 numVertices = clustering1.shape[0] 234 error = 0 235 236 for i in range(numVertices): 237 same_cl = clustering1[i] == clustering1 238 same_learned_cl = clustering2[i] == clustering2 239 error += (same_cl != same_learned_cl).sum() 240 241 return float(error)/(numVertices*(numVertices-1)) 242