1 /*!
2  * \file
3  *
4  * \brief Various routines that perform random-walk based operations
5           on graphs stored as gk_csr_t matrices.
6  *
7  * \author George Karypis
8  * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
9  */
10 
11 #include <GKlib.h>
12 
13 
14 /*************************************************************************/
15 /*! Computes the (personalized) page-rank of the vertices in a graph.
16 
17   \param mat is the matrix storing the graph.
18   \param lamda is the restart probability.
19   \param eps is the error tolerance for convergance.
20   \param max_niter is the maximum number of allowed iterations.
21   \param pr on entry stores the restart distribution of the vertices.
22          This allows for the computation of personalized page-rank scores
23          by appropriately setting that parameter.
24          On return, pr stores the computed page ranks.
25 
26   \returns the number of iterations that were performed.
27 */
28 /**************************************************************************/
gk_rw_PageRank(gk_csr_t * mat,float lamda,float eps,int max_niter,float * pr)29 int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
30 {
31   ssize_t i, j, k, iter, nrows;
32   double *rscale, *prold, *prnew, *prtmp;
33   double fromsinks, error;
34   ssize_t *rowptr;
35   int *rowind;
36   float *rowval;
37 
38   nrows  = mat->nrows;
39   rowptr = mat->rowptr;
40   rowind = mat->rowind;
41   rowval = mat->rowval;
42 
43   prold  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
44   prnew  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
45   rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
46 
47   /* compute the scaling factors to get adjacency weights into transition
48      probabilities */
49   for (i=0; i<nrows; i++) {
50     for (j=rowptr[i]; j<rowptr[i+1]; j++)
51       rscale[i] += rowval[j];
52     if (rscale[i] > 0)
53       rscale[i] = 1.0/rscale[i];
54   }
55 
56   /* the restart distribution is the initial pr scores */
57   for (i=0; i<nrows; i++)
58     prnew[i] = pr[i];
59 
60   /* get into the PR iteration */
61   for (iter=0; iter<max_niter; iter++) {
62     gk_SWAP(prnew, prold, prtmp);
63     gk_dset(nrows, 0.0, prnew);
64 
65     /* determine the total current PR score of the sinks so that you
66        can distribute them to all nodes according to the restart
67        distribution. */
68     for (fromsinks=0.0, i=0; i<nrows; i++) {
69       if (rscale[i] == 0)
70         fromsinks += prold[i];
71     }
72 
73     /* push random-walk scores to the outlinks */
74     for (i=0; i<nrows; i++) {
75       for (j=rowptr[i]; j<rowptr[i+1]; j++)
76         prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
77     }
78 
79     /* apply the restart conditions */
80     for (i=0; i<nrows; i++) {
81       prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
82     }
83 
84     /* compute the error */
85     for (error=0.0, i=0; i<nrows; i++)
86       error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
87 
88     //printf("nrm1: %le  maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
89 
90     if (error < eps)
91       break;
92   }
93 
94   /* store the computed pr scores into pr for output */
95   for (i=0; i<nrows; i++)
96     pr[i] = prnew[i];
97 
98   gk_free((void **)&prnew, &prold, &rscale, LTERM);
99 
100   return (int)(iter+1);
101 
102 }
103 
104