1 //------------------------------------------------------------------------------
2 // SuiteSparse/GraphBLAS/Demo/Source/dpagerank2: pagerank using a real semiring
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // PageRank via EXTREME GraphBLAS-ing!
11
12 // A is a square unsymmetric binary matrix of size n-by-n, where A(i,j) is the
13 // edge (i,j). Self-edges are OK. A can be of any built-in type.
14
15 // On output, P is pointer to an array of PageRank structs. P[0] is the
16 // highest ranked page, with pagerank P[0].pagerank and the page corresponds to
17 // node number P[0].page in the graph. P[1] is the next page, and so on, to
18 // the lowest-ranked page P[n-1].page with rank P[n-1].pagerank.
19
20 // This version operates on the original matrix A, without changing it. The
21 // entire computation is done via a set of user-defined objects: a type,
22 // several operators, a monoid, and a semiring.
23
24 // Acknowledgements: this method was written with input from Richard Veras,
25 // Franz Franchetti, and Scott McMillan, Carnegie Mellon University.
26
27 #include "GraphBLAS.h"
28
29 //------------------------------------------------------------------------------
30 // helper macros
31 //------------------------------------------------------------------------------
32
33 // free all workspace
34 #define FREEWORK \
35 { \
36 GrB_Vector_free (&rdouble) ; \
37 GrB_Vector_free (&r) ; \
38 GrB_Vector_free (&rnew) ; \
39 GrB_Vector_free (&dout) ; \
40 GrB_Vector_free (&rdiff) ; \
41 GrB_Descriptor_free (&desc) ; \
42 if (I != NULL) free (I) ; \
43 if (X != NULL) free (X) ; \
44 GrB_BinaryOp_free (&PageRank_accum) ; \
45 GrB_BinaryOp_free (&PageRank_add) ; \
46 GrB_Monoid_free (&PageRank_monoid) ; \
47 GrB_BinaryOp_free (&PageRank_multiply) ; \
48 GrB_Semiring_free (&PageRank_semiring) ; \
49 GrB_BinaryOp_free (&PageRank_diff) ; \
50 GrB_Type_free (&PageRank_type) ; \
51 GrB_UnaryOp_free (&PageRank_div) ; \
52 GrB_UnaryOp_free (&PageRank_get) ; \
53 GrB_UnaryOp_free (&PageRank_init) ; \
54 }
55
56 // error handler: free output P and all workspace (used by CHECK and OK macros)
57 #define FREE_ALL \
58 { \
59 if (P != NULL) free (P) ; \
60 FREEWORK ; \
61 }
62
63 #undef GB_PUBLIC
64 #define GB_LIBRARY
65 #include "graphblas_demos.h"
66
67 //------------------------------------------------------------------------------
68 // scalar types and operators
69 //------------------------------------------------------------------------------
70
71 // each node has a rank value, and a constant which is 1/outdegree
72 typedef struct
73 {
74 double rank ;
75 double invdegree ;
76 }
77 pagerank_type ;
78
79 // probability of walking to random neighbor
80 #define PAGERANK_DAMPING 0.85
81
82 // NOTE: these operators use global values. dpagerank2 can be done in
83 // parallel, internally, but only one instance of dpagerank can be used.
84
85 // global values shared by all threads:
86 double pagerank_teleport, pagerank_init_rank, pagerank_rsum ;
87
88 // identity value for the pagerank_add monoid
89 pagerank_type pagerank_zero = { 0, 0 } ;
90
91 // unary operator to divide a double entry by the scalar pagerank_rsum
pagerank_div(double * z,const double * x)92 void pagerank_div (double *z, const double *x)
93 {
94 (*z) = (*x) / pagerank_rsum ;
95 }
96
97 // unary operator that typecasts PageRank_type to double, extracting the rank
pagerank_get_rank(double * z,const pagerank_type * x)98 void pagerank_get_rank (double *z, const pagerank_type *x)
99 {
100 (*z) = (x->rank) ;
101 }
102
103 // unary operator to initialize a node
init_page(pagerank_type * z,const double * x)104 void init_page (pagerank_type *z, const double *x)
105 {
106 z->rank = pagerank_init_rank ; // all nodes start with rank 1/n
107 z->invdegree = 1. / (*x) ; // set 1/outdegree of this node
108 }
109
110 //------------------------------------------------------------------------------
111 // PageRank semiring
112 //------------------------------------------------------------------------------
113
114 // In MATLAB notation, the new rank is computed with:
115 // newrank = PAGERANK_DAMPING * (rank * D * A) + pagerank_teleport
116
117 // where A is a square binary matrix of the original graph, and A(i,j)=1 if
118 // page i has a link to page j. rank is a row vector of size n. The matrix D
119 // is diagonal, with D(i,i)=1/outdegree(i), where outdegree(i) = the outdegree
120 // of node i, or equivalently, outdegree(i) = sum (A (i,:)).
121
122 // That is, if newrank(j) were computed with a dot product:
123 // newrank (j) = 0
124 // for all i:
125 // newrank (j) = newrank (j) + (rank (i) * D (i,i)) * A (i,j)
126
127 // To accomplish this computation in a single vector-matrix multiply, the value
128 // of D(i,i) is held as component of a combined data type, the pagerank_type,
129 // which has both the rank(i) and the entry D(i,i).
130
131 // binary multiplicative operator for the pagerank semiring
pagerank_multiply(pagerank_type * z,const pagerank_type * x,const bool * y)132 void pagerank_multiply
133 (
134 pagerank_type *z,
135 const pagerank_type *x,
136 const bool *y
137 )
138 {
139 // y is the boolean entry of the matrix, A(i,j)
140 // x->rank is the rank of node i, and x->invdegree is 1/outdegree(i)
141 // note that z->invdegree is left unchanged
142 z->rank = (*y) ? ((x->rank) * (x->invdegree)) : 0 ;
143 }
144
145 // binary additive operator for the pagerank semiring
pagerank_add(pagerank_type * z,const pagerank_type * x,const pagerank_type * y)146 void pagerank_add
147 (
148 pagerank_type *z,
149 const pagerank_type *x,
150 const pagerank_type *y
151 )
152 {
153 // note that z->invdegree is left unchanged; it is unused
154 z->rank = (x->rank) + (y->rank) ;
155 }
156
157 //------------------------------------------------------------------------------
158 // pagerank accumulator
159 //------------------------------------------------------------------------------
160
161 // The semiring computes the vector newrank = rank*D*A. To complete the page
162 // rank computation, the new rank must be scaled by the
163 // PAGERANK_DAMPING, and the pagerank_teleport must be included, which is
164 // done in the page rank accumulator:
165
166 // newrank = PAGERANK_DAMPING * newrank + pagerank_teleport
167
168 // The PageRank_semiring does not construct the entire pagerank_type of
169 // rank*D*A, since the vector that holds newrank(i) must also keep the
170 // 1/invdegree(i), unchanged. This is restored in the accumulator operator.
171
172 // binary operator to accumulate the new rank from the old
pagerank_accum(pagerank_type * z,const pagerank_type * x,const pagerank_type * y)173 void pagerank_accum
174 (
175 pagerank_type *z,
176 const pagerank_type *x,
177 const pagerank_type *y
178 )
179 {
180 // note that this formula does not use the old rank:
181 // new rank = PAGERANK_DAMPING * (rank*A ) + pagerank_teleport
182 double rnew = PAGERANK_DAMPING * (y->rank) + pagerank_teleport ;
183
184 // update the rank, and copy over the inverse degree from the old page info
185 z->rank = rnew ;
186 z->invdegree = x->invdegree ;
187 }
188
189 //------------------------------------------------------------------------------
190 // pagerank_diff: compute the change in the rank
191 //------------------------------------------------------------------------------
192
pagerank_diff(pagerank_type * z,const pagerank_type * x,const pagerank_type * y)193 void pagerank_diff
194 (
195 pagerank_type *z,
196 const pagerank_type *x,
197 const pagerank_type *y
198 )
199 {
200 double delta = (x->rank) - (y->rank) ;
201 z->rank = delta * delta ;
202 }
203
204 //------------------------------------------------------------------------------
205 // comparison function for qsort
206 //------------------------------------------------------------------------------
207
pagerank_compar(const void * x,const void * y)208 int pagerank_compar (const void *x, const void *y)
209 {
210 PageRank *a = (PageRank *) x ;
211 PageRank *b = (PageRank *) y ;
212
213 // sort by pagerank in descending order
214 if (a->pagerank > b->pagerank)
215 {
216 return (-1) ;
217 }
218 else if (a->pagerank == b->pagerank)
219 {
220 return (0) ;
221 }
222 else
223 {
224 return (1) ;
225 }
226 }
227
228 //------------------------------------------------------------------------------
229 // dpagerank2: compute the PageRank of all nodes in a graph
230 //------------------------------------------------------------------------------
231
232 GB_PUBLIC
dpagerank2(PageRank ** Phandle,GrB_Matrix A,int itermax,double tol,int * iters,GrB_Desc_Value method)233 GrB_Info dpagerank2 // GrB_SUCCESS or error condition
234 (
235 PageRank **Phandle, // output: pointer to array of PageRank structs
236 GrB_Matrix A, // input graph, not modified
237 int itermax, // max number of iterations
238 double tol, // stop when norm (r-rnew,2) < tol
239 int *iters, // number of iterations taken
240 GrB_Desc_Value method // method to use for GrB_vxm (for testing only)
241 )
242 {
243
244 GrB_Info info ;
245 double *X = NULL ;
246 GrB_Index n, *I = NULL ;
247 PageRank *P = NULL ;
248 GrB_Descriptor desc = NULL ;
249 GrB_Vector r = NULL, dout = NULL, rdouble = NULL, rnew = NULL, rdiff = NULL;
250
251 //--------------------------------------------------------------------------
252 // create the new type, operators, monoid, and semiring
253 //--------------------------------------------------------------------------
254
255 GrB_Type PageRank_type = NULL ;
256 GrB_UnaryOp PageRank_div = NULL, PageRank_get = NULL, PageRank_init = NULL ;
257 GrB_BinaryOp PageRank_accum = NULL, PageRank_add = NULL,
258 PageRank_multiply = NULL, PageRank_diff = NULL ;
259 GrB_Monoid PageRank_monoid = NULL ;
260 GrB_Semiring PageRank_semiring = NULL ;
261
262 // create the new Page type
263 OK (GrB_Type_new (&PageRank_type, sizeof (pagerank_type))) ;
264
265 #define U (GxB_unary_function)
266 #define B (GxB_binary_function)
267
268 // create the unary operator to initialize the PageRank_type of each node
269 OK (GrB_UnaryOp_new (&PageRank_init, U init_page, PageRank_type, GrB_FP64));
270
271 // create PageRank_accum
272 OK (GrB_BinaryOp_new (&PageRank_accum, B pagerank_accum,
273 PageRank_type, PageRank_type, PageRank_type)) ;
274
275 // create PageRank_add operator and monoid
276 OK (GrB_BinaryOp_new (&PageRank_add, B pagerank_add,
277 PageRank_type, PageRank_type, PageRank_type)) ;
278 OK (GrB_Monoid_new_UDT (&PageRank_monoid, PageRank_add, &pagerank_zero)) ;
279
280 // create PageRank_multiply operator
281 OK (GrB_BinaryOp_new (&PageRank_multiply, B pagerank_multiply,
282 PageRank_type, PageRank_type, GrB_BOOL)) ;
283
284 // create PageRank_semiring
285 OK (GrB_Semiring_new (&PageRank_semiring, PageRank_monoid,
286 PageRank_multiply)) ;
287
288 // create unary operator that typecasts the PageRank_type to double
289 OK (GrB_UnaryOp_new (&PageRank_get, U pagerank_get_rank, GrB_FP64,
290 PageRank_type)) ;
291
292 // create unary operator that scales the rank by pagerank_rsum
293 OK (GrB_UnaryOp_new (&PageRank_div, U pagerank_div, GrB_FP64, GrB_FP64)) ;
294
295 // create PageRank_diff operator
296 OK (GrB_BinaryOp_new (&PageRank_diff, B pagerank_diff,
297 PageRank_type, PageRank_type, PageRank_type)) ;
298
299 //--------------------------------------------------------------------------
300 // initializations
301 //--------------------------------------------------------------------------
302
303 (*Phandle) = NULL ;
304
305 // n = size (A,1) ; // number of nodes
306 OK (GrB_Matrix_nrows (&n, A)) ;
307
308 // dout = sum (A,2) ; // dout(i) is the out-degree of node i
309 OK (GrB_Vector_new (&dout, GrB_FP64, n)) ;
310 OK (GrB_Matrix_reduce_Monoid (dout, NULL, NULL, GrB_PLUS_MONOID_FP64,
311 A, NULL)) ;
312
313 // all nodes start with rank 1/n
314 pagerank_init_rank = 1.0 / ((double) n) ;
315
316 // initialize the page rank and inverse degree of each node
317 OK (GrB_Vector_new (&r, PageRank_type, n)) ;
318 OK (GrB_Vector_apply (r, NULL, NULL, PageRank_init, dout, NULL)) ;
319
320 // dout vector no longer needed
321 OK (GrB_Vector_free (&dout)) ;
322
323 // to jump to any random node in entire graph:
324 pagerank_teleport = (1-PAGERANK_DAMPING) / n ;
325
326 tol = tol*tol ; // use tol^2 so sqrt(...) not needed
327 double pagerank_rdiff = 1 ; // so first iteration is always done
328
329 // create rdouble, a double vector of size n
330 OK (GrB_Vector_new (&rdouble, GrB_FP64, n)) ;
331
332 // Note that dup is needed, since the invdegree is copied by the
333 // PageRank_accum.
334 OK (GrB_Vector_dup (&rnew, r)) ;
335 OK (GrB_Vector_new (&rdiff, PageRank_type, n)) ;
336
337 // select method for GrB_vxm (for testing only; default is fine)
338 if (method != GxB_DEFAULT)
339 {
340 OK (GrB_Descriptor_new (&desc)) ;
341 OK (GxB_Desc_set (desc, GxB_AxB_METHOD, method)) ;
342 }
343
344 //--------------------------------------------------------------------------
345 // iterate to compute the pagerank of each node
346 //--------------------------------------------------------------------------
347
348 for ((*iters) = 0 ; (*iters) < itermax && pagerank_rdiff > tol ; (*iters)++)
349 {
350
351 // rnew = PAGERANK_DAMPING * (r * D * A) + pagerank_teleport
352 OK (GrB_vxm (rnew, NULL, PageRank_accum, PageRank_semiring, r, A,
353 desc)) ;
354
355 // compute pagerank_rdiff = sum ((r - rnew).^2)
356 OK (GrB_Vector_eWiseAdd_BinaryOp (rdiff, NULL, NULL, PageRank_diff,
357 r, rnew, NULL)) ;
358 pagerank_type rsum ;
359 OK (GrB_Vector_reduce_UDT (&rsum, NULL, PageRank_monoid, rdiff, NULL)) ;
360
361 pagerank_rdiff = rsum.rank ;
362
363 // r = rnew, using a swap, which is faster than assign or dup
364 GrB_Vector rtemp = r ;
365 r = rnew ;
366 rnew = rtemp ;
367 }
368
369 //--------------------------------------------------------------------------
370 // scale the result: rdouble = rank / sum(r)
371 //--------------------------------------------------------------------------
372
373 // rnew (for the safe version) is no longer needed
374 GrB_Vector_free (&rnew) ;
375
376 // rdouble = pagerank_get_rank (r)
377 OK (GrB_Vector_apply (rdouble, NULL, NULL, PageRank_get, r, NULL)) ;
378
379 // r no longer needed
380 GrB_Vector_free (&r) ;
381
382 // pagerank_rsum = sum (rdouble)
383 OK (GrB_Vector_reduce_FP64 (&pagerank_rsum, NULL, GrB_PLUS_MONOID_FP64,
384 rdouble, NULL)) ;
385
386 // could also do this with GrB_vxm, with a 1-by-1 matrix
387 // r = r / pagerank_rsum
388 OK (GrB_Vector_apply (rdouble, NULL, NULL, PageRank_div, rdouble, NULL)) ;
389
390 //--------------------------------------------------------------------------
391 // sort the nodes by pagerank
392 //--------------------------------------------------------------------------
393
394 // GraphBLAS does not have a mechanism to sort the components of a vector,
395 // so it must be done by extracting and then sorting the tuples from
396 // the GrB_Vector rdouble.
397
398 // [r,irank] = sort (r, 'descend') ;
399
400 // [I,X] = find (r) ;
401 X = (double *) malloc (n * sizeof (double)) ;
402 I = (GrB_Index *) malloc (n * sizeof (GrB_Index)) ;
403 CHECK (I != NULL && X != NULL, GrB_OUT_OF_MEMORY) ;
404 GrB_Index nvals = n ;
405 OK (GrB_Vector_extractTuples_FP64 (I, X, &nvals, rdouble)) ;
406
407 // rdouble no longer needed
408 GrB_Vector_free (&rdouble) ;
409
410 // P = struct (X,I)
411 P = (PageRank *) malloc (n * sizeof (PageRank)) ;
412 CHECK (P != NULL, GrB_OUT_OF_MEMORY) ;
413 int64_t k ;
414 for (k = 0 ; k < nvals ; k++)
415 {
416 // The kth ranked page is P[k].page (with k=0 being the highest rank),
417 // and its pagerank is P[k].pagerank.
418 P [k].pagerank = X [k] ;
419 // I [k] == k will be true for SuiteSparse:GraphBLAS but in general I
420 // can be returned in any order, so use I [k] instead of k, for other
421 // GraphBLAS implementations.
422 P [k].page = I [k] ;
423 }
424 for ( ; k < n ; k++)
425 {
426 // If A has empty columns, then r will become sparse. In this case,
427 // pages with no incoming edges will be unranked. The drowscale
428 // function avoids this problem by adding a
429 P [k].pagerank = 0 ;
430 P [k].page = -1 ;
431 }
432
433 // free workspace
434 FREEWORK ;
435
436 // qsort (P) in descending order
437 qsort (P, nvals, sizeof (PageRank), pagerank_compar) ;
438
439 //--------------------------------------------------------------------------
440 // return result
441 //--------------------------------------------------------------------------
442
443 (*Phandle) = P ;
444 return (GrB_SUCCESS) ;
445 }
446
447