1 /*!
2 * \file
3 *
4 * \brief Various routines that perform random-walk based operations
5 on graphs stored as gk_csr_t matrices.
6 *
7 * \author George Karypis
8 * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
9 */
10
11 #include <GKlib.h>
12
13
14 /*************************************************************************/
15 /*! Computes the (personalized) page-rank of the vertices in a graph.
16
17 \param mat is the matrix storing the graph.
18 \param lamda is the restart probability.
19 \param eps is the error tolerance for convergance.
20 \param max_niter is the maximum number of allowed iterations.
21 \param pr on entry stores the restart distribution of the vertices.
22 This allows for the computation of personalized page-rank scores
23 by appropriately setting that parameter.
24 On return, pr stores the computed page ranks.
25
26 \returns the number of iterations that were performed.
27 */
28 /**************************************************************************/
gk_rw_PageRank(gk_csr_t * mat,float lamda,float eps,int max_niter,float * pr)29 int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
30 {
31 ssize_t i, j, k, iter, nrows;
32 double *rscale, *prold, *prnew, *prtmp;
33 double fromsinks, error;
34 ssize_t *rowptr;
35 int *rowind;
36 float *rowval;
37
38 nrows = mat->nrows;
39 rowptr = mat->rowptr;
40 rowind = mat->rowind;
41 rowval = mat->rowval;
42
43 prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
44 prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
45 rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
46
47 /* compute the scaling factors to get adjacency weights into transition
48 probabilities */
49 for (i=0; i<nrows; i++) {
50 for (j=rowptr[i]; j<rowptr[i+1]; j++)
51 rscale[i] += rowval[j];
52 if (rscale[i] > 0)
53 rscale[i] = 1.0/rscale[i];
54 }
55
56 /* the restart distribution is the initial pr scores */
57 for (i=0; i<nrows; i++)
58 prnew[i] = pr[i];
59
60 /* get into the PR iteration */
61 for (iter=0; iter<max_niter; iter++) {
62 gk_SWAP(prnew, prold, prtmp);
63 gk_dset(nrows, 0.0, prnew);
64
65 /* determine the total current PR score of the sinks so that you
66 can distribute them to all nodes according to the restart
67 distribution. */
68 for (fromsinks=0.0, i=0; i<nrows; i++) {
69 if (rscale[i] == 0)
70 fromsinks += prold[i];
71 }
72
73 /* push random-walk scores to the outlinks */
74 for (i=0; i<nrows; i++) {
75 for (j=rowptr[i]; j<rowptr[i+1]; j++)
76 prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
77 }
78
79 /* apply the restart conditions */
80 for (i=0; i<nrows; i++) {
81 prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
82 }
83
84 /* compute the error */
85 for (error=0.0, i=0; i<nrows; i++)
86 error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
87
88 //printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
89
90 if (error < eps)
91 break;
92 }
93
94 /* store the computed pr scores into pr for output */
95 for (i=0; i<nrows; i++)
96 pr[i] = prnew[i];
97
98 gk_free((void **)&prnew, &prold, &rscale, LTERM);
99
100 return (int)(iter+1);
101
102 }
103
104