1 /* -*- mode: C -*-  */
2 /*
3    IGraph library.
4    Copyright (C) 2013  Gabor Csardi <csardi.gabor@gmail.com>
5    334 Harvard street, Cambridge, MA 02139 USA
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301 USA
21 
22 */
23 
24 #include "igraph_graphlets.h"
25 
26 #include "igraph_conversion.h"
27 #include "igraph_constructors.h"
28 #include "igraph_cliques.h"
29 #include "igraph_memory.h"
30 #include "igraph_operators.h"
31 #include "igraph_qsort.h"
32 #include "igraph_structural.h"
33 
34 /**
35  * \section graphlets_intro Introduction
36  *
37  * <para>
38  * Graphlet decomposition models a weighted undirected graph
39  * via the union of potentially overlapping dense social groups.
40  * This is done by a two-step algorithm. In the first step, a candidate
41  * set of groups (a candidate basis) is created by finding cliques
42  * in the thresholded input graph. In the second step,
43  * the graph is projected onto the candidate basis, resulting in a
44  * weight coefficient for each clique in the candidate basis.
45  * </para>
46  *
47  * <para>
48  * For more information on graphlet decomposition, see
49  * Hossein Azari Soufiani and Edoardo M Airoldi: "Graphlet decomposition of a weighted network",
50  * https://arxiv.org/abs/1203.2821 and http://proceedings.mlr.press/v22/azari12/azari12.pdf
51  * </para>
52  *
53  * <para>
54  * igraph contains three functions for performing the graphlet
55  * decomponsition of a graph. The first is \ref igraph_graphlets(), which
56  * performs both steps of the method and returns a list of subgraphs
57  * with their corresponding weights. The other two functions
58  * correspond to the first and second steps of the algorithm, and they are
59  * useful if the user wishes to perform them individually:
60  * \ref igraph_graphlets_candidate_basis() and
61  * \ref igraph_graphlets_project().
62  * </para>
63  *
64  * <para>
65  * <remark>
66  * Note: The term "graphlet" is used for several unrelated concepts
67  * in the literature. If you are looking to count induced subgraphs, see
68  * \ref igraph_motifs_randesu() and \ref igraph_subisomorphic_lad().
69  * </remark>
70  * </para>
71  */
72 
73 typedef struct {
74     igraph_vector_int_t *resultids;
75     igraph_t *result;
76     igraph_vector_t *resultweights;
77     int nc;
78 } igraph_i_subclique_next_free_t;
79 
igraph_i_subclique_next_free(void * ptr)80 static void igraph_i_subclique_next_free(void *ptr) {
81     igraph_i_subclique_next_free_t *data = ptr;
82     int i;
83     if (data->resultids) {
84         for (i = 0; i < data->nc; i++) {
85             if (data->resultids + i) {
86                 igraph_vector_int_destroy(data->resultids + i);
87             }
88         }
89         IGRAPH_FREE(data->resultids);
90     }
91     if (data->result) {
92         for (i = 0; i < data->nc; i++) {
93             if (data->result + i) {
94                 igraph_destroy(data->result + i);
95             }
96         }
97         IGRAPH_FREE(data->result);
98     }
99     if (data->resultweights) {
100         for (i = 0; i < data->nc; i++) {
101             if (data->resultweights + i) {
102                 igraph_vector_destroy(data->resultweights + i);
103             }
104         }
105         IGRAPH_FREE(data->resultweights);
106     }
107 }
108 
109 /**
110  * \function igraph_i_subclique_next
111  * Calculate subcliques of the cliques found at the previous level
112  *
113  * \param graph Input graph.
114  * \param weight Edge weights.
115  * \param ids The ids of the vertices in the input graph.
116  * \param cliques A list of vectors, vertex ids for cliques.
117  * \param result The result is stored here, a list of graphs is stored
118  *        here.
119  * \param resultids The ids of the vertices in the result graphs is
120  *        stored here.
121  * \param clique_thr The thresholds for the cliques are stored here,
122  *        if not a null pointer.
123  * \param next_thr The next thresholds for the cliques are stored
124  *        here, if not a null pointer.
125  *
126  */
127 
igraph_i_subclique_next(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_int_t * ids,const igraph_vector_ptr_t * cliques,igraph_t ** result,igraph_vector_t ** resultweights,igraph_vector_int_t ** resultids,igraph_vector_t * clique_thr,igraph_vector_t * next_thr)128 static int igraph_i_subclique_next(const igraph_t *graph,
129                                    const igraph_vector_t *weights,
130                                    const igraph_vector_int_t *ids,
131                                    const igraph_vector_ptr_t *cliques,
132                                    igraph_t **result,
133                                    igraph_vector_t **resultweights,
134                                    igraph_vector_int_t **resultids,
135                                    igraph_vector_t *clique_thr,
136                                    igraph_vector_t *next_thr) {
137 
138     /* The input is a set of cliques, that were found at a previous level.
139        For each clique, we calculate the next threshold, drop the isolate
140        vertices, and create a new graph from them. */
141 
142     igraph_vector_int_t mark, map;
143     igraph_vector_int_t edges;
144     igraph_vector_t neis, newedges;
145     igraph_integer_t c, nc = igraph_vector_ptr_size(cliques);
146     igraph_integer_t no_of_nodes = igraph_vcount(graph);
147     igraph_integer_t no_of_edges = igraph_ecount(graph);
148     igraph_i_subclique_next_free_t freedata = { 0, 0, 0, nc };
149 
150     if (igraph_vector_size(weights) != no_of_edges) {
151         IGRAPH_ERROR("Invalid length of weight vector", IGRAPH_EINVAL);
152     }
153 
154     if (igraph_vector_int_size(ids) != no_of_nodes) {
155         IGRAPH_ERROR("Invalid length of ID vector", IGRAPH_EINVAL);
156     }
157 
158     IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
159     *resultids = IGRAPH_CALLOC(nc, igraph_vector_int_t);
160     if (!*resultids) {
161         IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
162     }
163     freedata.resultids = *resultids;
164     *resultweights = IGRAPH_CALLOC(nc, igraph_vector_t);
165     if (!*resultweights) {
166         IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
167     }
168     freedata.resultweights = *resultweights;
169     *result = IGRAPH_CALLOC(nc, igraph_t);
170     if (!*result) {
171         IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
172     }
173     freedata.result = *result;
174 
175     igraph_vector_init(&newedges, 100);
176     IGRAPH_FINALLY(igraph_vector_destroy, &newedges);
177     igraph_vector_int_init(&mark, no_of_nodes);
178     IGRAPH_FINALLY(igraph_vector_int_destroy, &mark);
179     igraph_vector_int_init(&map, no_of_nodes);
180     IGRAPH_FINALLY(igraph_vector_int_destroy, &map);
181     igraph_vector_int_init(&edges, 100);
182     IGRAPH_FINALLY(igraph_vector_int_destroy, &edges);
183     igraph_vector_init(&neis, 10);
184     IGRAPH_FINALLY(igraph_vector_destroy, &neis);
185 
186     if (clique_thr) {
187         igraph_vector_resize(clique_thr, nc);
188     }
189     if (next_thr)   {
190         igraph_vector_resize(next_thr,   nc);
191     }
192 
193     /* Iterate over all cliques. We will create graphs for all
194        subgraphs defined by the cliques. */
195 
196     for (c = 0; c < nc; c++) {
197         igraph_vector_t *clique = VECTOR(*cliques)[c];
198         igraph_real_t minweight = IGRAPH_INFINITY, nextweight = IGRAPH_INFINITY;
199         igraph_integer_t e, v, clsize = igraph_vector_size(clique);
200         igraph_integer_t noe, nov = 0;
201         igraph_vector_int_t *newids = (*resultids) + c;
202         igraph_vector_t *neww = (*resultweights) + c;
203         igraph_t *newgraph = (*result) + c;
204         igraph_vector_int_clear(&edges);
205         igraph_vector_clear(&newedges);
206 
207         /* --------------------------------------------------- */
208 
209         /* Iterate over the vertices of a clique and find the
210            edges within the clique, put them in a list.
211            At the same time, search for the minimum edge weight within
212            the clique and the next edge weight if any. */
213 
214         for (v = 0; v < clsize; v++) {
215             igraph_integer_t i, neilen, node = VECTOR(*clique)[v];
216             igraph_incident(graph, &neis, node, IGRAPH_ALL);
217             neilen = igraph_vector_size(&neis);
218             VECTOR(mark)[node] = c + 1;
219             for (i = 0; i < neilen; i++) {
220                 igraph_integer_t edge = VECTOR(neis)[i];
221                 igraph_integer_t nei = IGRAPH_OTHER(graph, edge, node);
222                 if (VECTOR(mark)[nei] == c + 1) {
223                     igraph_real_t w = VECTOR(*weights)[edge];
224                     igraph_vector_int_push_back(&edges, edge);
225                     if (w < minweight) {
226                         nextweight = minweight;
227                         minweight = w;
228                     } else if (w > minweight && w < nextweight) {
229                         nextweight = w;
230                     }
231                 }
232             }
233         } /* v < clsize */
234 
235         /* --------------------------------------------------- */
236 
237         /* OK, we have stored the edges and found the weight of
238            the clique and the next weight to consider */
239 
240         if (clique_thr) {
241             VECTOR(*clique_thr)[c] = minweight;
242         }
243         if (next_thr)   {
244             VECTOR(*next_thr  )[c] = nextweight;
245         }
246 
247         /* --------------------------------------------------- */
248 
249         /* Now we create the subgraph from the edges above the next
250            threshold, and their incident vertices. */
251 
252         igraph_vector_int_init(newids, 0);
253         igraph_vector_init(neww, 0);
254 
255         /* We use mark[] to denote the vertices already mapped to
256            the new graph. If this is -(c+1), then the vertex was
257            mapped, otherwise it was not. The mapping itself is in
258            map[]. */
259 
260         noe = igraph_vector_int_size(&edges);
261         for (e = 0; e < noe; e++) {
262             igraph_integer_t edge = VECTOR(edges)[e];
263             igraph_integer_t from, to;
264             igraph_real_t w = VECTOR(*weights)[edge];
265             igraph_edge(graph, edge, &from, &to);
266             if (w >= nextweight) {
267                 if (VECTOR(mark)[from] == c + 1) {
268                     VECTOR(map)[from] = nov++;
269                     VECTOR(mark)[from] = -(c + 1);
270                     igraph_vector_int_push_back(newids, VECTOR(*ids)[from]);
271                 }
272                 if (VECTOR(mark)[to] == c + 1) {
273                     VECTOR(map)[to] = nov++;
274                     VECTOR(mark)[to] = -(c + 1);
275                     igraph_vector_int_push_back(newids, VECTOR(*ids)[to]);
276                 }
277                 igraph_vector_push_back(neww, w);
278                 igraph_vector_push_back(&newedges, VECTOR(map)[from]);
279                 igraph_vector_push_back(&newedges, VECTOR(map)[to]);
280             }
281         }
282 
283         igraph_create(newgraph, &newedges, nov, IGRAPH_UNDIRECTED);
284 
285         /* --------------------------------------------------- */
286 
287     } /* c < nc */
288 
289     igraph_vector_destroy(&neis);
290     igraph_vector_int_destroy(&edges);
291     igraph_vector_int_destroy(&mark);
292     igraph_vector_int_destroy(&map);
293     igraph_vector_destroy(&newedges);
294     IGRAPH_FINALLY_CLEAN(6);  /* + freedata */
295 
296     return 0;
297 }
298 
igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t * vl)299 static void igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t *vl) {
300     int i, n = igraph_vector_ptr_size(vl);
301     for (i = 0; i < n; i++) {
302         igraph_vector_t *v = (igraph_vector_t*) VECTOR(*vl)[i];
303         if (v) {
304             igraph_vector_destroy(v);
305         }
306     }
307     igraph_vector_ptr_destroy(vl);
308 }
309 
igraph_i_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds,const igraph_vector_int_t * ids,igraph_real_t startthr)310 static int igraph_i_graphlets(const igraph_t *graph,
311                               const igraph_vector_t *weights,
312                               igraph_vector_ptr_t *cliques,
313                               igraph_vector_t *thresholds,
314                               const igraph_vector_int_t *ids,
315                               igraph_real_t startthr) {
316 
317     /* This version is different from the main function, and is
318        appropriate to use in recursive calls, because it _adds_ the
319        results to 'cliques' and 'thresholds' and uses the supplied
320        'startthr' */
321 
322     igraph_vector_ptr_t mycliques;
323     int no_of_edges = igraph_ecount(graph);
324     igraph_vector_t subv;
325     igraph_t subg;
326     int i, nographs, nocliques;
327     igraph_t *newgraphs = 0;
328     igraph_vector_t *newweights = 0;
329     igraph_vector_int_t *newids = 0;
330     igraph_vector_t clique_thr, next_thr;
331     igraph_i_subclique_next_free_t freedata = { 0, 0, 0, 0 };
332 
333     IGRAPH_CHECK(igraph_vector_ptr_init(&mycliques, 0));
334     IGRAPH_FINALLY(igraph_i_graphlets_destroy_vectorlist, &mycliques);
335     IGRAPH_VECTOR_INIT_FINALLY(&subv, 0);
336 
337     /* We start by finding cliques at the lowest threshold */
338     for (i = 0; i < no_of_edges; i++) {
339         if (VECTOR(*weights)[i] >= startthr) {
340             IGRAPH_CHECK(igraph_vector_push_back(&subv, i));
341         }
342     }
343     igraph_subgraph_edges(graph, &subg, igraph_ess_vector(&subv),
344                           /*delete_vertices=*/ 0);
345     IGRAPH_FINALLY(igraph_destroy, &subg);
346     igraph_maximal_cliques(&subg, &mycliques, /*min_size=*/ 0, /*max_size=*/ 0);
347     igraph_destroy(&subg);
348     IGRAPH_FINALLY_CLEAN(1);
349     nocliques = igraph_vector_ptr_size(&mycliques);
350 
351     igraph_vector_destroy(&subv);
352     IGRAPH_FINALLY_CLEAN(1);
353 
354     /* Get the next cliques and thresholds */
355     IGRAPH_VECTOR_INIT_FINALLY(&next_thr, 0);
356     IGRAPH_VECTOR_INIT_FINALLY(&clique_thr, 0);
357 
358     igraph_i_subclique_next(graph, weights, ids, &mycliques,
359                             &newgraphs, &newweights, &newids,
360                             &clique_thr, &next_thr);
361 
362     freedata.result = newgraphs;
363     freedata.resultids = newids;
364     freedata.resultweights = newweights;
365     freedata.nc = nocliques;
366     IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
367 
368     /* Store cliques at the current level */
369     igraph_vector_append(thresholds, &clique_thr);
370     for (i = 0; i < nocliques; i++) {
371         igraph_vector_t *cl = (igraph_vector_t*) VECTOR(mycliques)[i];
372         int j, n = igraph_vector_size(cl);
373         for (j = 0; j < n; j++) {
374             int node = VECTOR(*cl)[j];
375             VECTOR(*cl)[j] = VECTOR(*ids)[node];
376         }
377         igraph_vector_sort(cl);
378     }
379     igraph_vector_ptr_append(cliques, &mycliques);
380 
381     /* Recursive calls for cliques found */
382     nographs = igraph_vector_ptr_size(&mycliques);
383     for (i = 0; i < nographs; i++) {
384         igraph_t *g = newgraphs + i;
385         if (igraph_vcount(g) > 1) {
386             igraph_vector_t *w_sub = newweights + i;
387             igraph_vector_int_t *ids_sub = newids + i;
388             igraph_i_graphlets(g, w_sub, cliques, thresholds, ids_sub, VECTOR(next_thr)[i]);
389         }
390     }
391 
392     igraph_vector_destroy(&clique_thr);
393     igraph_vector_destroy(&next_thr);
394     igraph_i_subclique_next_free(&freedata);
395     igraph_vector_ptr_destroy(&mycliques); /* contents was copied over */
396     IGRAPH_FINALLY_CLEAN(4);
397 
398     return 0;
399 }
400 
401 typedef struct {
402     const igraph_vector_ptr_t *cliques;
403     const igraph_vector_t *thresholds;
404 } igraph_i_graphlets_filter_t;
405 
igraph_i_graphlets_filter_cmp(void * data,const void * a,const void * b)406 static int igraph_i_graphlets_filter_cmp(void *data, const void *a, const void *b) {
407     igraph_i_graphlets_filter_t *ddata = (igraph_i_graphlets_filter_t *) data;
408     int *aa = (int*) a;
409     int *bb = (int*) b;
410     igraph_real_t t_a = VECTOR(*ddata->thresholds)[*aa];
411     igraph_real_t t_b = VECTOR(*ddata->thresholds)[*bb];
412     igraph_vector_t *v_a, *v_b;
413     int s_a, s_b;
414 
415     if (t_a < t_b) {
416         return -1;
417     } else if (t_a > t_b) {
418         return 1;
419     }
420 
421     v_a = (igraph_vector_t*) VECTOR(*ddata->cliques)[*aa];
422     v_b = (igraph_vector_t*) VECTOR(*ddata->cliques)[*bb];
423     s_a = igraph_vector_size(v_a);
424     s_b = igraph_vector_size(v_b);
425 
426     if (s_a < s_b) {
427         return -1;
428     } else if (s_a > s_b) {
429         return 1;
430     } else {
431         return 0;
432     }
433 }
434 
igraph_i_graphlets_filter(igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)435 static int igraph_i_graphlets_filter(igraph_vector_ptr_t *cliques,
436                                      igraph_vector_t *thresholds) {
437 
438     /* Filter out non-maximal cliques. Every non-maximal clique is
439        part of a maximal clique, at the same threshold.
440 
441        First we order the cliques, according to their threshold, and
442        then according to their size. So when we look for a candidate
443        superset, we only need to check the cliques next in the list,
444        until their threshold is different. */
445 
446     int i, iptr, nocliques = igraph_vector_ptr_size(cliques);
447     igraph_vector_int_t order;
448     igraph_i_graphlets_filter_t sortdata = { cliques, thresholds };
449 
450     igraph_vector_int_init(&order, nocliques);
451     IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
452     for (i = 0; i < nocliques; i++) {
453         VECTOR(order)[i] = i;
454     }
455 
456     igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
457                    igraph_i_graphlets_filter_cmp);
458 
459     for (i = 0; i < nocliques - 1; i++) {
460         int ri = VECTOR(order)[i];
461         igraph_vector_t *needle = VECTOR(*cliques)[ri];
462         igraph_real_t thr_i = VECTOR(*thresholds)[ri];
463         int n_i = igraph_vector_size(needle);
464         int j = i + 1;
465 
466         for (j = i + 1; j < nocliques; j++) {
467             int rj = VECTOR(order)[j];
468             igraph_real_t thr_j = VECTOR(*thresholds)[rj];
469             igraph_vector_t *hay;
470             int n_j, pi = 0, pj = 0;
471 
472             /* Done, not found */
473             if (thr_j != thr_i) {
474                 break;
475             }
476 
477             /* Check size of hay */
478             hay = VECTOR(*cliques)[rj];
479             n_j = igraph_vector_size(hay);
480             if (n_i > n_j) {
481                 continue;
482             }
483 
484             /* Check if hay is a superset */
485             while (pi < n_i && pj < n_j && n_i - pi <= n_j - pj) {
486                 int ei = VECTOR(*needle)[pi];
487                 int ej = VECTOR(*hay)[pj];
488                 if (ei < ej) {
489                     break;
490                 } else if (ei > ej) {
491                     pj++;
492                 } else {
493                     pi++; pj++;
494                 }
495             }
496             if (pi == n_i) {
497                 /* Found, delete immediately */
498                 igraph_vector_destroy(needle);
499                 igraph_free(needle);
500                 VECTOR(*cliques)[ri] = 0;
501                 break;
502             }
503         }
504     }
505 
506     /* Remove null pointers from the list of cliques */
507     for (i = 0, iptr = 0; i < nocliques; i++) {
508         igraph_vector_t *v = VECTOR(*cliques)[i];
509         if (v) {
510             VECTOR(*cliques)[iptr] = v;
511             VECTOR(*thresholds)[iptr] = VECTOR(*thresholds)[i];
512             iptr++;
513         }
514     }
515     igraph_vector_ptr_resize(cliques, iptr);
516     igraph_vector_resize(thresholds, iptr);
517 
518     igraph_vector_int_destroy(&order);
519     IGRAPH_FINALLY_CLEAN(1);
520 
521     return 0;
522 }
523 
524 /**
525  * \function igraph_graphlets_candidate_basis
526  * Calculate a candidate graphlets basis
527  *
528  * \param graph The input graph, it must be a simple graph, edge directions are
529  *        ignored.
530  * \param weights Weights of the edges, a vector.
531  * \param cliques An initialized vector of pointers.
532  *        The graphlet basis is stored here. Each element of the pointer
533  *        vector will be a vector of vertex ids. Each elements must be
534  *        destroyed using \ref igraph_vector_destroy() and \ref igraph_free().
535  * \param thresholds An initialized vector, the (highest possible)
536  *        weight thresholds for finding the basis subgraphs are stored
537  *        here.
538  * \return Error code.
539  *
540  * See also: \ref igraph_graphlets() and \ref igraph_graphlets_project().
541  */
542 
igraph_graphlets_candidate_basis(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)543 int igraph_graphlets_candidate_basis(const igraph_t *graph,
544                                      const igraph_vector_t *weights,
545                                      igraph_vector_ptr_t *cliques,
546                                      igraph_vector_t *thresholds) {
547 
548     int no_of_nodes = igraph_vcount(graph);
549     int no_of_edges = igraph_ecount(graph);
550     igraph_real_t minthr;
551     igraph_vector_int_t ids;
552     igraph_bool_t simple;
553     int i;
554 
555     /* Some checks */
556     if (weights == NULL) {
557         IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
558     }
559 
560     if (igraph_vector_size(weights) != no_of_edges) {
561         IGRAPH_ERROR("Invalid weight vector length", IGRAPH_EINVAL);
562     }
563 
564     igraph_is_simple(graph, &simple);
565     if (!simple) {
566         IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
567     }
568 
569     minthr = igraph_vector_min(weights);
570     igraph_vector_ptr_clear(cliques);
571     igraph_vector_clear(thresholds);
572     igraph_vector_int_init(&ids, no_of_nodes);
573     IGRAPH_FINALLY(igraph_vector_int_destroy, &ids);
574     for (i = 0; i < no_of_nodes; i++) {
575         VECTOR(ids)[i] = i;
576     }
577 
578     igraph_i_graphlets(graph, weights, cliques, thresholds, &ids, minthr);
579 
580     igraph_vector_int_destroy(&ids);
581     IGRAPH_FINALLY_CLEAN(1);
582 
583     igraph_i_graphlets_filter(cliques, thresholds);
584 
585     return 0;
586 }
587 
588 /* TODO: not made static because it is used by the R interface */
igraph_i_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter,int vid1)589 int igraph_i_graphlets_project(const igraph_t *graph,
590                                const igraph_vector_t *weights,
591                                const igraph_vector_ptr_t *cliques,
592                                igraph_vector_t *Mu, igraph_bool_t startMu,
593                                int niter, int vid1) {
594 
595     int no_of_nodes = igraph_vcount(graph);
596     int no_of_edges = igraph_ecount(graph);
597     int no_cliques = igraph_vector_ptr_size(cliques);
598     igraph_vector_int_t vcl, vclidx, ecl, eclidx, cel, celidx;
599     igraph_vector_t edgelist, newweights, normfact;
600     int i, total_vertices, e, ptr, total_edges;
601     igraph_bool_t simple;
602 
603     /* Check arguments */
604     if (weights == NULL) {
605         IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
606     }
607     if (no_of_edges != igraph_vector_size(weights)) {
608         IGRAPH_ERROR("Invalid weight vector size", IGRAPH_EINVAL);
609     }
610     if (startMu && igraph_vector_size(Mu) != no_cliques) {
611         IGRAPH_ERROR("Invalid start coefficient vector size", IGRAPH_EINVAL);
612     }
613     if (niter < 0) {
614         IGRAPH_ERROR("Number of iterations must be non-negative", IGRAPH_EINVAL);
615     }
616     igraph_is_simple(graph, &simple);
617     if (!simple) {
618         IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
619     }
620 
621     if (!startMu) {
622         igraph_vector_resize(Mu, no_cliques);
623         igraph_vector_fill(Mu, 1);
624     }
625 
626     /* Count # cliques per vertex. Also, create an index
627        for the edges per clique. */
628     IGRAPH_CHECK(igraph_vector_int_init(&vclidx, no_of_nodes + 2));
629     IGRAPH_FINALLY(igraph_vector_int_destroy, &vclidx);
630     IGRAPH_CHECK(igraph_vector_int_init(&celidx, no_cliques + 3));
631     IGRAPH_FINALLY(igraph_vector_int_destroy, &celidx);
632     for (i = 0, total_vertices = 0, total_edges = 0; i < no_cliques; i++) {
633         igraph_vector_t *v = VECTOR(*cliques)[i];
634         int j, n = igraph_vector_size(v);
635         total_vertices += n;
636         total_edges += n * (n - 1) / 2;
637         VECTOR(celidx)[i + 2] = total_edges;
638         for (j = 0; j < n; j++) {
639             int vv = VECTOR(*v)[j] - vid1;
640             VECTOR(vclidx)[vv + 2] += 1;
641         }
642     }
643     VECTOR(celidx)[i + 2] = total_edges;
644 
645     /* Finalize index vector */
646     for (i = 0; i < no_of_nodes; i++) {
647         VECTOR(vclidx)[i + 2] += VECTOR(vclidx)[i + 1];
648     }
649 
650     /* Create vertex-clique list, the cliques for each vertex. */
651     IGRAPH_CHECK(igraph_vector_int_init(&vcl, total_vertices));
652     IGRAPH_FINALLY(igraph_vector_int_destroy, &vcl);
653     for (i = 0; i < no_cliques; i++) {
654         igraph_vector_t *v = VECTOR(*cliques)[i];
655         int j, n = igraph_vector_size(v);
656         for (j = 0; j < n; j++) {
657             int vv = VECTOR(*v)[j] - vid1;
658             int p = VECTOR(vclidx)[vv + 1];
659             VECTOR(vcl)[p] = i;
660             VECTOR(vclidx)[vv + 1] += 1;
661         }
662     }
663 
664     /* Create an edge-clique list, the cliques of each edge */
665     IGRAPH_CHECK(igraph_vector_int_init(&ecl, total_edges));
666     IGRAPH_FINALLY(igraph_vector_int_destroy, &ecl);
667     IGRAPH_CHECK(igraph_vector_int_init(&eclidx, no_of_edges + 1));
668     IGRAPH_FINALLY(igraph_vector_int_destroy, &eclidx);
669     IGRAPH_CHECK(igraph_vector_init(&edgelist, no_of_edges * 2));
670     IGRAPH_FINALLY(igraph_vector_destroy, &edgelist);
671     IGRAPH_CHECK(igraph_get_edgelist(graph, &edgelist, /*by_col=*/ 0));
672     for (i = 0, e = 0, ptr = 0; e < no_of_edges; e++) {
673         int from = VECTOR(edgelist)[i++];
674         int to = VECTOR(edgelist)[i++];
675         int from_s = VECTOR(vclidx)[from];
676         int from_e = VECTOR(vclidx)[from + 1];
677         int to_s = VECTOR(vclidx)[to];
678         int to_e = VECTOR(vclidx)[to + 1];
679         VECTOR(eclidx)[e] = ptr;
680         while (from_s < from_e && to_s < to_e) {
681             int from_v = VECTOR(vcl)[from_s];
682             int to_v = VECTOR(vcl)[to_s];
683             if (from_v == to_v) {
684                 VECTOR(ecl)[ptr++] = from_v;
685                 from_s++; to_s++;
686             } else if (from_v < to_v) {
687                 from_s++;
688             } else {
689                 to_s++;
690             }
691         }
692     }
693     VECTOR(eclidx)[e] = ptr;
694 
695     igraph_vector_destroy(&edgelist);
696     IGRAPH_FINALLY_CLEAN(1);
697 
698     /* Convert the edge-clique list to a clique-edge list */
699     IGRAPH_CHECK(igraph_vector_int_init(&cel, total_edges));
700     IGRAPH_FINALLY(igraph_vector_int_destroy, &cel);
701     for (i = 0; i < no_of_edges; i++) {
702         int ecl_s = VECTOR(eclidx)[i], ecl_e = VECTOR(eclidx)[i + 1], j;
703         for (j = ecl_s; j < ecl_e; j++) {
704             int cl = VECTOR(ecl)[j];
705             int epos = VECTOR(celidx)[cl + 1];
706             VECTOR(cel)[epos] = i;
707             VECTOR(celidx)[cl + 1] += 1;
708         }
709     }
710 
711     /* Normalizing factors for the iteration */
712     IGRAPH_CHECK(igraph_vector_init(&normfact, no_cliques));
713     IGRAPH_FINALLY(igraph_vector_destroy, &normfact);
714     for (i = 0; i < no_cliques; i++) {
715         igraph_vector_t *v = VECTOR(*cliques)[i];
716         int n = igraph_vector_size(v);
717         VECTOR(normfact)[i] = n * (n + 1) / 2;
718     }
719 
720     /* We have the clique-edge list, so do the projection now */
721     IGRAPH_CHECK(igraph_vector_init(&newweights, no_of_edges));
722     IGRAPH_FINALLY(igraph_vector_destroy, &newweights);
723     for (i = 0; i < niter; i++) {
724         for (e = 0; e < no_of_edges; e++) {
725             int start = VECTOR(eclidx)[e];
726             int end = VECTOR(eclidx)[e + 1];
727             VECTOR(newweights)[e] = 0.0001;
728             while (start < end) {
729                 int clique = VECTOR(ecl)[start++];
730                 VECTOR(newweights)[e] += VECTOR(*Mu)[clique];
731             }
732         }
733         for (e = 0; e < no_cliques; e++) {
734             igraph_real_t sumratio = 0;
735             int start = VECTOR(celidx)[e];
736             int end = VECTOR(celidx)[e + 1];
737             while (start < end) {
738                 int edge = VECTOR(cel)[start++];
739                 sumratio += VECTOR(*weights)[edge] / VECTOR(newweights)[edge];
740             }
741             VECTOR(*Mu)[e] *= sumratio / VECTOR(normfact)[e];
742         }
743     }
744 
745     igraph_vector_destroy(&newweights);
746     igraph_vector_destroy(&normfact);
747     igraph_vector_int_destroy(&cel);
748     igraph_vector_int_destroy(&eclidx);
749     igraph_vector_int_destroy(&ecl);
750     igraph_vector_int_destroy(&vcl);
751     igraph_vector_int_destroy(&celidx);
752     igraph_vector_int_destroy(&vclidx);
753     IGRAPH_FINALLY_CLEAN(8);
754 
755     return 0;
756 }
757 
758 /**
759  * \function igraph_graphlets_project
760  * Project a graph on a graphlets basis
761  *
762  * Note that the graph projected does not have to be the same that
763  * was used to calculate the graphlet basis, but it is assumed that
764  * it has the same number of vertices, and the vertex ids of the two
765  * graphs match.
766  * \param graph The input graph, it must be a simple graph, edge directions are
767  *        ignored.
768  * \param weights Weights of the edges in the input graph, a vector.
769  * \param cliques The graphlet basis, a pointer vector, in which each
770  *        element is a vector of vertex ids.
771  * \param Mu An initialized vector, the weights of the graphlets will
772  *        be stored here. This vector is also used to initialize the
773  *        the weight vector for the iterative algorithm, if the
774  *        \c startMu argument is true (non-zero).
775  * \param startMu If true (non-zero), then the supplied Mu vector is
776  *        used as the starting point of the iteration. Otherwise a
777  *        constant 1 vector is used.
778  * \param niter Integer scalar, the number of iterations to perform.
779  * \return Error code.
780  *
781  * See also: \ref igraph_graphlets() and
782  * \ref igraph_graphlets_candidate_basis().
783  */
784 
igraph_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter)785 int igraph_graphlets_project(const igraph_t *graph,
786                              const igraph_vector_t *weights,
787                              const igraph_vector_ptr_t *cliques,
788                              igraph_vector_t *Mu, igraph_bool_t startMu,
789                              int niter) {
790 
791     return igraph_i_graphlets_project(graph, weights, cliques, Mu, startMu,
792                                       niter, /*vid1=*/ 0);
793 }
794 
795 typedef struct igraph_i_graphlets_order_t {
796     const igraph_vector_ptr_t *cliques;
797     const igraph_vector_t *Mu;
798 } igraph_i_graphlets_order_t;
799 
igraph_i_graphlets_order_cmp(void * data,const void * a,const void * b)800 static int igraph_i_graphlets_order_cmp(void *data, const void *a, const void *b) {
801     igraph_i_graphlets_order_t *ddata = (igraph_i_graphlets_order_t*) data;
802     int *aa = (int*) a;
803     int *bb = (int*) b;
804     igraph_real_t Mu_a = VECTOR(*ddata->Mu)[*aa];
805     igraph_real_t Mu_b = VECTOR(*ddata->Mu)[*bb];
806 
807     if (Mu_a < Mu_b) {
808         return 1;
809     } else if (Mu_a > Mu_b) {
810         return -1;
811     } else {
812         return 0;
813     }
814 }
815 
816 /**
817  * \function igraph_graphlets
818  * Calculate graphlets basis and project the graph on it
819  *
820  * This function simply calls \ref igraph_graphlets_candidate_basis()
821  * and \ref igraph_graphlets_project(), and then orders the graphlets
822  * according to decreasing weights.
823  * \param graph The input graph, it must be a simple graph, edge directions are
824  *        ignored.
825  * \param weights Weights of the edges, a vector.
826  * \param cliques An initialized vector of pointers.
827  *        The graphlet basis is stored here. Each element of the pointer
828  *        vector will be a vector of vertex ids.
829  * \param Mu An initialized vector, the weights of the graphlets will
830  *        be stored here.
831  * \param niter Integer scalar, the number of iterations to perform
832  *        for the projection step.
833  * \return Error code.
834  *
835  * See also: \ref igraph_graphlets_candidate_basis() and
836  * \ref igraph_graphlets_project().
837  */
838 
igraph_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,int niter)839 int igraph_graphlets(const igraph_t *graph,
840                      const igraph_vector_t *weights,
841                      igraph_vector_ptr_t *cliques,
842                      igraph_vector_t *Mu, int niter) {
843 
844     int i, nocliques;
845     igraph_vector_t thresholds;
846     igraph_vector_int_t order;
847     igraph_i_graphlets_order_t sortdata = { cliques, Mu };
848 
849     igraph_vector_init(&thresholds, 0);
850     IGRAPH_FINALLY(igraph_vector_destroy, &thresholds);
851     igraph_graphlets_candidate_basis(graph, weights, cliques, &thresholds);
852     igraph_vector_destroy(&thresholds);
853     IGRAPH_FINALLY_CLEAN(1);
854 
855     igraph_graphlets_project(graph, weights, cliques, Mu, /*startMu=*/ 0, niter);
856 
857     nocliques = igraph_vector_ptr_size(cliques);
858     igraph_vector_int_init(&order, nocliques);
859     IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
860     for (i = 0; i < nocliques; i++) {
861         VECTOR(order)[i] = i;
862     }
863     igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
864                    igraph_i_graphlets_order_cmp);
865 
866     igraph_vector_ptr_index_int(cliques, &order);
867     igraph_vector_index_int(Mu, &order);
868 
869     igraph_vector_int_destroy(&order);
870     IGRAPH_FINALLY_CLEAN(1);
871 
872     return 0;
873 }
874