1 //------------------------------------------------------------------------------
2 // SuiteSparse/GraphBLAS/Demo/Source/dpagerank2: pagerank using a real semiring
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // PageRank via EXTREME GraphBLAS-ing!
11 
12 // A is a square unsymmetric binary matrix of size n-by-n, where A(i,j) is the
13 // edge (i,j).  Self-edges are OK.  A can be of any built-in type.
14 
15 // On output, P is pointer to an array of PageRank structs.  P[0] is the
16 // highest ranked page, with pagerank P[0].pagerank and the page corresponds to
17 // node number P[0].page in the graph.  P[1] is the next page, and so on, to
18 // the lowest-ranked page P[n-1].page with rank P[n-1].pagerank.
19 
20 // This version operates on the original matrix A, without changing it.  The
21 // entire computation is done via a set of user-defined objects:  a type,
22 // several operators, a monoid, and a semiring.
23 
24 // Acknowledgements:  this method was written with input from Richard Veras,
25 // Franz Franchetti, and Scott McMillan, Carnegie Mellon University.
26 
27 #include "GraphBLAS.h"
28 
29 //------------------------------------------------------------------------------
30 // helper macros
31 //------------------------------------------------------------------------------
32 
33 // free all workspace
34 #define FREEWORK                                \
35 {                                               \
36     GrB_Vector_free (&rdouble) ;                \
37     GrB_Vector_free (&r) ;                      \
38     GrB_Vector_free (&rnew) ;                   \
39     GrB_Vector_free (&dout) ;                   \
40     GrB_Vector_free (&rdiff) ;                  \
41     GrB_Descriptor_free (&desc) ;               \
42     if (I != NULL) free (I) ;                   \
43     if (X != NULL) free (X) ;                   \
44     GrB_BinaryOp_free (&PageRank_accum) ;       \
45     GrB_BinaryOp_free (&PageRank_add) ;         \
46     GrB_Monoid_free (&PageRank_monoid) ;        \
47     GrB_BinaryOp_free (&PageRank_multiply) ;    \
48     GrB_Semiring_free (&PageRank_semiring) ;    \
49     GrB_BinaryOp_free (&PageRank_diff) ;        \
50     GrB_Type_free (&PageRank_type) ;            \
51     GrB_UnaryOp_free (&PageRank_div) ;          \
52     GrB_UnaryOp_free (&PageRank_get) ;          \
53     GrB_UnaryOp_free (&PageRank_init) ;         \
54 }
55 
56 // error handler: free output P and all workspace (used by CHECK and OK macros)
57 #define FREE_ALL                \
58 {                               \
59     if (P != NULL) free (P) ;   \
60     FREEWORK ;                  \
61 }
62 
63 #undef GB_PUBLIC
64 #define GB_LIBRARY
65 #include "graphblas_demos.h"
66 
67 //------------------------------------------------------------------------------
68 // scalar types and operators
69 //------------------------------------------------------------------------------
70 
71 // each node has a rank value, and a constant which is 1/outdegree
72 typedef struct
73 {
74     double rank ;
75     double invdegree ;
76 }
77 pagerank_type ;
78 
79 // probability of walking to random neighbor
80 #define PAGERANK_DAMPING 0.85
81 
82 // NOTE: these operators use global values.  dpagerank2 can be done in
83 // parallel, internally, but only one instance of dpagerank can be used.
84 
85 // global values shared by all threads:
86 double pagerank_teleport, pagerank_init_rank, pagerank_rsum ;
87 
88 // identity value for the pagerank_add monoid
89 pagerank_type pagerank_zero = { 0, 0 } ;
90 
91 // unary operator to divide a double entry by the scalar pagerank_rsum
pagerank_div(double * z,const double * x)92 void pagerank_div (double *z, const double *x)
93 {
94     (*z) = (*x) / pagerank_rsum ;
95 }
96 
97 // unary operator that typecasts PageRank_type to double, extracting the rank
pagerank_get_rank(double * z,const pagerank_type * x)98 void pagerank_get_rank (double *z, const pagerank_type *x)
99 {
100     (*z) = (x->rank) ;
101 }
102 
103 // unary operator to initialize a node
init_page(pagerank_type * z,const double * x)104 void init_page (pagerank_type *z, const double *x)
105 {
106     z->rank = pagerank_init_rank ;  // all nodes start with rank 1/n
107     z->invdegree = 1. / (*x) ;      // set 1/outdegree of this node
108 }
109 
110 //------------------------------------------------------------------------------
111 // PageRank semiring
112 //------------------------------------------------------------------------------
113 
114 // In MATLAB notation, the new rank is computed with:
115 // newrank = PAGERANK_DAMPING * (rank * D * A) + pagerank_teleport
116 
117 // where A is a square binary matrix of the original graph, and A(i,j)=1 if
118 // page i has a link to page j.  rank is a row vector of size n.  The matrix D
119 // is diagonal, with D(i,i)=1/outdegree(i), where outdegree(i) = the outdegree
120 // of node i, or equivalently, outdegree(i) = sum (A (i,:)).
121 
122 // That is, if newrank(j) were computed with a dot product:
123 //      newrank (j) = 0
124 //      for all i:
125 //          newrank (j) = newrank (j) + (rank (i) * D (i,i)) * A (i,j)
126 
127 // To accomplish this computation in a single vector-matrix multiply, the value
128 // of D(i,i) is held as component of a combined data type, the pagerank_type,
129 // which has both the rank(i) and the entry D(i,i).
130 
131 // binary multiplicative operator for the pagerank semiring
pagerank_multiply(pagerank_type * z,const pagerank_type * x,const bool * y)132 void pagerank_multiply
133 (
134     pagerank_type *z,
135     const pagerank_type *x,
136     const bool *y
137 )
138 {
139     // y is the boolean entry of the matrix, A(i,j)
140     // x->rank is the rank of node i, and x->invdegree is 1/outdegree(i)
141     // note that z->invdegree is left unchanged
142     z->rank = (*y) ? ((x->rank) * (x->invdegree)) : 0 ;
143 }
144 
145 // binary additive operator for the pagerank semiring
pagerank_add(pagerank_type * z,const pagerank_type * x,const pagerank_type * y)146 void pagerank_add
147 (
148     pagerank_type *z,
149     const pagerank_type *x,
150     const pagerank_type *y
151 )
152 {
153     // note that z->invdegree is left unchanged; it is unused
154     z->rank = (x->rank) + (y->rank) ;
155 }
156 
157 //------------------------------------------------------------------------------
158 // pagerank accumulator
159 //------------------------------------------------------------------------------
160 
161 // The semiring computes the vector newrank = rank*D*A.  To complete the page
162 // rank computation, the new rank must be scaled by the
163 // PAGERANK_DAMPING, and the pagerank_teleport must be included, which is
164 // done in the page rank accumulator:
165 
166 // newrank = PAGERANK_DAMPING * newrank + pagerank_teleport
167 
168 // The PageRank_semiring does not construct the entire pagerank_type of
169 // rank*D*A, since the vector that holds newrank(i) must also keep the
170 // 1/invdegree(i), unchanged.  This is restored in the accumulator operator.
171 
172 // binary operator to accumulate the new rank from the old
pagerank_accum(pagerank_type * z,const pagerank_type * x,const pagerank_type * y)173 void pagerank_accum
174 (
175     pagerank_type *z,
176     const pagerank_type *x,
177     const pagerank_type *y
178 )
179 {
180     // note that this formula does not use the old rank:
181     // new rank = PAGERANK_DAMPING * (rank*A ) + pagerank_teleport
182     double rnew = PAGERANK_DAMPING * (y->rank) + pagerank_teleport ;
183 
184     // update the rank, and copy over the inverse degree from the old page info
185     z->rank = rnew ;
186     z->invdegree = x->invdegree ;
187 }
188 
189 //------------------------------------------------------------------------------
190 // pagerank_diff: compute the change in the rank
191 //------------------------------------------------------------------------------
192 
pagerank_diff(pagerank_type * z,const pagerank_type * x,const pagerank_type * y)193 void pagerank_diff
194 (
195     pagerank_type *z,
196     const pagerank_type *x,
197     const pagerank_type *y
198 )
199 {
200     double delta = (x->rank) - (y->rank) ;
201     z->rank = delta * delta ;
202 }
203 
204 //------------------------------------------------------------------------------
205 // comparison function for qsort
206 //------------------------------------------------------------------------------
207 
pagerank_compar(const void * x,const void * y)208 int pagerank_compar (const void *x, const void *y)
209 {
210     PageRank *a = (PageRank *) x ;
211     PageRank *b = (PageRank *) y ;
212 
213     // sort by pagerank in descending order
214     if (a->pagerank > b->pagerank)
215     {
216         return (-1) ;
217     }
218     else if (a->pagerank == b->pagerank)
219     {
220         return (0) ;
221     }
222     else
223     {
224         return (1) ;
225     }
226 }
227 
228 //------------------------------------------------------------------------------
229 // dpagerank2: compute the PageRank of all nodes in a graph
230 //------------------------------------------------------------------------------
231 
232 GB_PUBLIC
dpagerank2(PageRank ** Phandle,GrB_Matrix A,int itermax,double tol,int * iters,GrB_Desc_Value method)233 GrB_Info dpagerank2         // GrB_SUCCESS or error condition
234 (
235     PageRank **Phandle,     // output: pointer to array of PageRank structs
236     GrB_Matrix A,           // input graph, not modified
237     int itermax,            // max number of iterations
238     double tol,             // stop when norm (r-rnew,2) < tol
239     int *iters,             // number of iterations taken
240     GrB_Desc_Value method   // method to use for GrB_vxm (for testing only)
241 )
242 {
243 
244     GrB_Info info ;
245     double *X = NULL ;
246     GrB_Index n, *I = NULL ;
247     PageRank *P = NULL ;
248     GrB_Descriptor desc = NULL ;
249     GrB_Vector r = NULL, dout = NULL, rdouble = NULL, rnew = NULL, rdiff = NULL;
250 
251     //--------------------------------------------------------------------------
252     // create the new type, operators, monoid, and semiring
253     //--------------------------------------------------------------------------
254 
255     GrB_Type PageRank_type = NULL ;
256     GrB_UnaryOp PageRank_div = NULL, PageRank_get = NULL, PageRank_init = NULL ;
257     GrB_BinaryOp PageRank_accum = NULL, PageRank_add = NULL,
258         PageRank_multiply = NULL, PageRank_diff = NULL ;
259     GrB_Monoid PageRank_monoid = NULL ;
260     GrB_Semiring PageRank_semiring = NULL ;
261 
262     // create the new Page type
263     OK (GrB_Type_new (&PageRank_type, sizeof (pagerank_type))) ;
264 
265     #define U (GxB_unary_function)
266     #define B (GxB_binary_function)
267 
268     // create the unary operator to initialize the PageRank_type of each node
269     OK (GrB_UnaryOp_new (&PageRank_init, U init_page, PageRank_type, GrB_FP64));
270 
271     // create PageRank_accum
272     OK (GrB_BinaryOp_new (&PageRank_accum, B pagerank_accum,
273         PageRank_type, PageRank_type, PageRank_type)) ;
274 
275     // create PageRank_add operator and monoid
276     OK (GrB_BinaryOp_new (&PageRank_add, B pagerank_add,
277         PageRank_type, PageRank_type, PageRank_type)) ;
278     OK (GrB_Monoid_new_UDT (&PageRank_monoid, PageRank_add, &pagerank_zero)) ;
279 
280     // create PageRank_multiply operator
281     OK (GrB_BinaryOp_new (&PageRank_multiply, B pagerank_multiply,
282         PageRank_type, PageRank_type, GrB_BOOL)) ;
283 
284     // create PageRank_semiring
285     OK (GrB_Semiring_new (&PageRank_semiring, PageRank_monoid,
286         PageRank_multiply)) ;
287 
288     // create unary operator that typecasts the PageRank_type to double
289     OK (GrB_UnaryOp_new (&PageRank_get, U pagerank_get_rank, GrB_FP64,
290         PageRank_type)) ;
291 
292     // create unary operator that scales the rank by pagerank_rsum
293     OK (GrB_UnaryOp_new (&PageRank_div, U pagerank_div, GrB_FP64, GrB_FP64)) ;
294 
295     // create PageRank_diff operator
296     OK (GrB_BinaryOp_new (&PageRank_diff, B pagerank_diff,
297         PageRank_type, PageRank_type, PageRank_type)) ;
298 
299     //--------------------------------------------------------------------------
300     // initializations
301     //--------------------------------------------------------------------------
302 
303     (*Phandle) = NULL ;
304 
305     // n = size (A,1) ;         // number of nodes
306     OK (GrB_Matrix_nrows (&n, A)) ;
307 
308     // dout = sum (A,2) ;       // dout(i) is the out-degree of node i
309     OK (GrB_Vector_new (&dout, GrB_FP64, n)) ;
310     OK (GrB_Matrix_reduce_Monoid (dout, NULL, NULL, GrB_PLUS_MONOID_FP64,
311         A, NULL)) ;
312 
313     // all nodes start with rank 1/n
314     pagerank_init_rank = 1.0 / ((double) n) ;
315 
316     // initialize the page rank and inverse degree of each node
317     OK (GrB_Vector_new (&r, PageRank_type, n)) ;
318     OK (GrB_Vector_apply (r, NULL, NULL, PageRank_init, dout, NULL)) ;
319 
320     // dout vector no longer needed
321     OK (GrB_Vector_free (&dout)) ;
322 
323     // to jump to any random node in entire graph:
324     pagerank_teleport = (1-PAGERANK_DAMPING) / n ;
325 
326     tol = tol*tol ;                 // use tol^2 so sqrt(...) not needed
327     double pagerank_rdiff = 1 ;     // so first iteration is always done
328 
329     // create rdouble, a double vector of size n
330     OK (GrB_Vector_new (&rdouble, GrB_FP64, n)) ;
331 
332     // Note that dup is needed, since the invdegree is copied by the
333     // PageRank_accum.
334     OK (GrB_Vector_dup (&rnew, r)) ;
335     OK (GrB_Vector_new (&rdiff, PageRank_type, n)) ;
336 
337     // select method for GrB_vxm (for testing only; default is fine)
338     if (method != GxB_DEFAULT)
339     {
340         OK (GrB_Descriptor_new (&desc)) ;
341         OK (GxB_Desc_set (desc, GxB_AxB_METHOD, method)) ;
342     }
343 
344     //--------------------------------------------------------------------------
345     // iterate to compute the pagerank of each node
346     //--------------------------------------------------------------------------
347 
348     for ((*iters) = 0 ; (*iters) < itermax && pagerank_rdiff > tol ; (*iters)++)
349     {
350 
351         // rnew = PAGERANK_DAMPING * (r * D * A) + pagerank_teleport
352         OK (GrB_vxm (rnew, NULL, PageRank_accum, PageRank_semiring, r, A,
353             desc)) ;
354 
355         // compute pagerank_rdiff = sum ((r - rnew).^2)
356         OK (GrB_Vector_eWiseAdd_BinaryOp (rdiff, NULL, NULL, PageRank_diff,
357             r, rnew, NULL)) ;
358         pagerank_type rsum ;
359         OK (GrB_Vector_reduce_UDT (&rsum, NULL, PageRank_monoid, rdiff, NULL)) ;
360 
361         pagerank_rdiff = rsum.rank ;
362 
363         // r = rnew, using a swap, which is faster than assign or dup
364         GrB_Vector rtemp = r ;
365         r = rnew ;
366         rnew = rtemp ;
367     }
368 
369     //--------------------------------------------------------------------------
370     // scale the result: rdouble = rank / sum(r)
371     //--------------------------------------------------------------------------
372 
373     // rnew (for the safe version) is no longer needed
374     GrB_Vector_free (&rnew) ;
375 
376     // rdouble = pagerank_get_rank (r)
377     OK (GrB_Vector_apply (rdouble, NULL, NULL, PageRank_get, r, NULL)) ;
378 
379     // r no longer needed
380     GrB_Vector_free (&r) ;
381 
382     // pagerank_rsum = sum (rdouble)
383     OK (GrB_Vector_reduce_FP64 (&pagerank_rsum, NULL, GrB_PLUS_MONOID_FP64,
384         rdouble, NULL)) ;
385 
386     // could also do this with GrB_vxm, with a 1-by-1 matrix
387     // r = r / pagerank_rsum
388     OK (GrB_Vector_apply (rdouble, NULL, NULL, PageRank_div, rdouble, NULL)) ;
389 
390     //--------------------------------------------------------------------------
391     // sort the nodes by pagerank
392     //--------------------------------------------------------------------------
393 
394     // GraphBLAS does not have a mechanism to sort the components of a vector,
395     // so it must be done by extracting and then sorting the tuples from
396     // the GrB_Vector rdouble.
397 
398     // [r,irank] = sort (r, 'descend') ;
399 
400     // [I,X] = find (r) ;
401     X = (double *) malloc (n * sizeof (double)) ;
402     I = (GrB_Index *) malloc (n * sizeof (GrB_Index)) ;
403     CHECK (I != NULL && X != NULL, GrB_OUT_OF_MEMORY) ;
404     GrB_Index nvals = n ;
405     OK (GrB_Vector_extractTuples_FP64 (I, X, &nvals, rdouble)) ;
406 
407     // rdouble no longer needed
408     GrB_Vector_free (&rdouble) ;
409 
410     // P = struct (X,I)
411     P = (PageRank *) malloc (n * sizeof (PageRank)) ;
412     CHECK (P != NULL, GrB_OUT_OF_MEMORY) ;
413     int64_t k ;
414     for (k = 0 ; k < nvals ; k++)
415     {
416         // The kth ranked page is P[k].page (with k=0 being the highest rank),
417         // and its pagerank is P[k].pagerank.
418         P [k].pagerank = X [k] ;
419         // I [k] == k will be true for SuiteSparse:GraphBLAS but in general I
420         // can be returned in any order, so use I [k] instead of k, for other
421         // GraphBLAS implementations.
422         P [k].page = I [k] ;
423     }
424     for ( ; k < n ; k++)
425     {
426         // If A has empty columns, then r will become sparse.  In this case,
427         // pages with no incoming edges will be unranked.  The drowscale
428         // function avoids this problem by adding a
429         P [k].pagerank = 0 ;
430         P [k].page = -1 ;
431     }
432 
433     // free workspace
434     FREEWORK ;
435 
436     // qsort (P) in descending order
437     qsort (P, nvals, sizeof (PageRank), pagerank_compar) ;
438 
439     //--------------------------------------------------------------------------
440     // return result
441     //--------------------------------------------------------------------------
442 
443     (*Phandle) = P ;
444     return (GrB_SUCCESS) ;
445 }
446 
447