1 /* -*- mode: C -*-  */
2 /*
3    IGraph library.
4    Copyright (C) 2013  Gabor Csardi <csardi.gabor@gmail.com>
5    334 Harvard street, Cambridge, MA 02139 USA
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301 USA
21 
22 */
23 
24 #include "igraph_graphlets.h"
25 #include "igraph_memory.h"
26 #include "igraph_constructors.h"
27 #include "igraph_cliques.h"
28 #include "igraph_structural.h"
29 #include "igraph_qsort.h"
30 #include "igraph_conversion.h"
31 
32 /**
33  * \section graphlets_intro Introduction
34  *
35  * <para>
36  * Graphlet decomposition models a weighted undirected graph
37  * via the union of potentially overlapping dense social groups.
38  * This is done by a two-step algorithm. In the first step, a candidate
39  * set of groups (a candidate basis) is created by finding cliques
40  * in the thresholded input graph. In the second step,
41  * the graph is projected onto the candidate basis, resulting in a
42  * weight coefficient for each clique in the candidate basis.
43  * </para>
44  *
45  * <para>
46  * For more information on graphlet decomposition, see
47  * Hossein Azari Soufiani and Edoardo M Airoldi: "Graphlet decomposition of a weighted network",
48  * https://arxiv.org/abs/1203.2821 and http://proceedings.mlr.press/v22/azari12/azari12.pdf
49  * </para>
50  *
51  * <para>
52  * igraph contains three functions for performing the graphlet
53  * decomponsition of a graph. The first is \ref igraph_graphlets(), which
54  * performs both steps of the method and returns a list of subgraphs
55  * with their corresponding weights. The other two functions
56  * correspond to the first and second steps of the algorithm, and they are
57  * useful if the user wishes to perform them individually:
58  * \ref igraph_graphlets_candidate_basis() and
59  * \ref igraph_graphlets_project().
60  * </para>
61  *
62  * <para>
63  * <remark>
64  * Note: The term "graphlet" is used for several unrelated concepts
65  * in the literature. If you are looking to count induced subgraphs, see
66  * \ref igraph_motifs_randesu() and \ref igraph_subisomorphic_lad().
67  * </remark>
68  * </para>
69  */
70 
71 typedef struct {
72     igraph_vector_int_t *resultids;
73     igraph_t *result;
74     igraph_vector_t *resultweights;
75     int nc;
76 } igraph_i_subclique_next_free_t;
77 
igraph_i_subclique_next_free(void * ptr)78 static void igraph_i_subclique_next_free(void *ptr) {
79     igraph_i_subclique_next_free_t *data = ptr;
80     int i;
81     if (data->resultids) {
82         for (i = 0; i < data->nc; i++) {
83             if (data->resultids + i) {
84                 igraph_vector_int_destroy(data->resultids + i);
85             }
86         }
87         igraph_Free(data->resultids);
88     }
89     if (data->result) {
90         for (i = 0; i < data->nc; i++) {
91             if (data->result + i) {
92                 igraph_destroy(data->result + i);
93             }
94         }
95         igraph_Free(data->result);
96     }
97     if (data->resultweights) {
98         for (i = 0; i < data->nc; i++) {
99             if (data->resultweights + i) {
100                 igraph_vector_destroy(data->resultweights + i);
101             }
102         }
103         igraph_Free(data->resultweights);
104     }
105 }
106 
107 /**
108  * \function igraph_i_subclique_next
109  * Calculate subcliques of the cliques found at the previous level
110  *
111  * \param graph Input graph.
112  * \param weight Edge weights.
113  * \param ids The ids of the vertices in the input graph.
114  * \param cliques A list of vectors, vertex ids for cliques.
115  * \param result The result is stored here, a list of graphs is stored
116  *        here.
117  * \param resultids The ids of the vertices in the result graphs is
118  *        stored here.
119  * \param clique_thr The thresholds for the cliques are stored here,
120  *        if not a null pointer.
121  * \param next_thr The next thresholds for the cliques are stored
122  *        here, if not a null pointer.
123  *
124  */
125 
igraph_i_subclique_next(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_int_t * ids,const igraph_vector_ptr_t * cliques,igraph_t ** result,igraph_vector_t ** resultweights,igraph_vector_int_t ** resultids,igraph_vector_t * clique_thr,igraph_vector_t * next_thr)126 static int igraph_i_subclique_next(const igraph_t *graph,
127                                    const igraph_vector_t *weights,
128                                    const igraph_vector_int_t *ids,
129                                    const igraph_vector_ptr_t *cliques,
130                                    igraph_t **result,
131                                    igraph_vector_t **resultweights,
132                                    igraph_vector_int_t **resultids,
133                                    igraph_vector_t *clique_thr,
134                                    igraph_vector_t *next_thr) {
135 
136     /* The input is a set of cliques, that were found at a previous level.
137        For each clique, we calculate the next threshold, drop the isolate
138        vertices, and create a new graph from them. */
139 
140     igraph_vector_int_t mark, map;
141     igraph_vector_int_t edges;
142     igraph_vector_t neis, newedges;
143     igraph_integer_t c, nc = igraph_vector_ptr_size(cliques);
144     igraph_integer_t no_of_nodes = igraph_vcount(graph);
145     igraph_integer_t no_of_edges = igraph_ecount(graph);
146     igraph_i_subclique_next_free_t freedata = { 0, 0, 0, nc };
147 
148     if (igraph_vector_size(weights) != no_of_edges) {
149         IGRAPH_ERROR("Invalid length of weight vector", IGRAPH_EINVAL);
150     }
151 
152     if (igraph_vector_int_size(ids) != no_of_nodes) {
153         IGRAPH_ERROR("Invalid length of ID vector", IGRAPH_EINVAL);
154     }
155 
156     IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
157     *resultids = igraph_Calloc(nc, igraph_vector_int_t);
158     if (!*resultids) {
159         IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
160     }
161     freedata.resultids = *resultids;
162     *resultweights = igraph_Calloc(nc, igraph_vector_t);
163     if (!*resultweights) {
164         IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
165     }
166     freedata.resultweights = *resultweights;
167     *result = igraph_Calloc(nc, igraph_t);
168     if (!*result) {
169         IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
170     }
171     freedata.result = *result;
172 
173     igraph_vector_init(&newedges, 100);
174     IGRAPH_FINALLY(igraph_vector_destroy, &newedges);
175     igraph_vector_int_init(&mark, no_of_nodes);
176     IGRAPH_FINALLY(igraph_vector_int_destroy, &mark);
177     igraph_vector_int_init(&map, no_of_nodes);
178     IGRAPH_FINALLY(igraph_vector_int_destroy, &map);
179     igraph_vector_int_init(&edges, 100);
180     IGRAPH_FINALLY(igraph_vector_int_destroy, &edges);
181     igraph_vector_init(&neis, 10);
182     IGRAPH_FINALLY(igraph_vector_destroy, &neis);
183 
184     if (clique_thr) {
185         igraph_vector_resize(clique_thr, nc);
186     }
187     if (next_thr)   {
188         igraph_vector_resize(next_thr,   nc);
189     }
190 
191     /* Iterate over all cliques. We will create graphs for all
192        subgraphs defined by the cliques. */
193 
194     for (c = 0; c < nc; c++) {
195         igraph_vector_t *clique = VECTOR(*cliques)[c];
196         igraph_real_t minweight = IGRAPH_INFINITY, nextweight = IGRAPH_INFINITY;
197         igraph_integer_t e, v, clsize = igraph_vector_size(clique);
198         igraph_integer_t noe, nov = 0;
199         igraph_vector_int_t *newids = (*resultids) + c;
200         igraph_vector_t *neww = (*resultweights) + c;
201         igraph_t *newgraph = (*result) + c;
202         igraph_vector_int_clear(&edges);
203         igraph_vector_clear(&newedges);
204 
205         /* --------------------------------------------------- */
206 
207         /* Iterate over the vertices of a clique and find the
208            edges within the clique, put them in a list.
209            At the same time, search for the minimum edge weight within
210            the clique and the next edge weight if any. */
211 
212         for (v = 0; v < clsize; v++) {
213             igraph_integer_t i, neilen, node = VECTOR(*clique)[v];
214             igraph_incident(graph, &neis, node, IGRAPH_ALL);
215             neilen = igraph_vector_size(&neis);
216             VECTOR(mark)[node] = c + 1;
217             for (i = 0; i < neilen; i++) {
218                 igraph_integer_t edge = VECTOR(neis)[i];
219                 igraph_integer_t nei = IGRAPH_OTHER(graph, edge, node);
220                 if (VECTOR(mark)[nei] == c + 1) {
221                     igraph_real_t w = VECTOR(*weights)[edge];
222                     igraph_vector_int_push_back(&edges, edge);
223                     if (w < minweight) {
224                         nextweight = minweight;
225                         minweight = w;
226                     } else if (w > minweight && w < nextweight) {
227                         nextweight = w;
228                     }
229                 }
230             }
231         } /* v < clsize */
232 
233         /* --------------------------------------------------- */
234 
235         /* OK, we have stored the edges and found the weight of
236            the clique and the next weight to consider */
237 
238         if (clique_thr) {
239             VECTOR(*clique_thr)[c] = minweight;
240         }
241         if (next_thr)   {
242             VECTOR(*next_thr  )[c] = nextweight;
243         }
244 
245         /* --------------------------------------------------- */
246 
247         /* Now we create the subgraph from the edges above the next
248            threshold, and their incident vertices. */
249 
250         igraph_vector_int_init(newids, 0);
251         igraph_vector_init(neww, 0);
252 
253         /* We use mark[] to denote the vertices already mapped to
254            the new graph. If this is -(c+1), then the vertex was
255            mapped, otherwise it was not. The mapping itself is in
256            map[]. */
257 
258         noe = igraph_vector_int_size(&edges);
259         for (e = 0; e < noe; e++) {
260             igraph_integer_t edge = VECTOR(edges)[e];
261             igraph_integer_t from, to;
262             igraph_real_t w = VECTOR(*weights)[edge];
263             igraph_edge(graph, edge, &from, &to);
264             if (w >= nextweight) {
265                 if (VECTOR(mark)[from] == c + 1) {
266                     VECTOR(map)[from] = nov++;
267                     VECTOR(mark)[from] = -(c + 1);
268                     igraph_vector_int_push_back(newids, VECTOR(*ids)[from]);
269                 }
270                 if (VECTOR(mark)[to] == c + 1) {
271                     VECTOR(map)[to] = nov++;
272                     VECTOR(mark)[to] = -(c + 1);
273                     igraph_vector_int_push_back(newids, VECTOR(*ids)[to]);
274                 }
275                 igraph_vector_push_back(neww, w);
276                 igraph_vector_push_back(&newedges, VECTOR(map)[from]);
277                 igraph_vector_push_back(&newedges, VECTOR(map)[to]);
278             }
279         }
280 
281         igraph_create(newgraph, &newedges, nov, IGRAPH_UNDIRECTED);
282 
283         /* --------------------------------------------------- */
284 
285     } /* c < nc */
286 
287     igraph_vector_destroy(&neis);
288     igraph_vector_int_destroy(&edges);
289     igraph_vector_int_destroy(&mark);
290     igraph_vector_int_destroy(&map);
291     igraph_vector_destroy(&newedges);
292     IGRAPH_FINALLY_CLEAN(6);  /* + freedata */
293 
294     return 0;
295 }
296 
igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t * vl)297 static void igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t *vl) {
298     int i, n = igraph_vector_ptr_size(vl);
299     for (i = 0; i < n; i++) {
300         igraph_vector_t *v = (igraph_vector_t*) VECTOR(*vl)[i];
301         if (v) {
302             igraph_vector_destroy(v);
303         }
304     }
305     igraph_vector_ptr_destroy(vl);
306 }
307 
igraph_i_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds,const igraph_vector_int_t * ids,igraph_real_t startthr)308 static int igraph_i_graphlets(const igraph_t *graph,
309                               const igraph_vector_t *weights,
310                               igraph_vector_ptr_t *cliques,
311                               igraph_vector_t *thresholds,
312                               const igraph_vector_int_t *ids,
313                               igraph_real_t startthr) {
314 
315     /* This version is different from the main function, and is
316        appropriate to use in recursive calls, because it _adds_ the
317        results to 'cliques' and 'thresholds' and uses the supplied
318        'startthr' */
319 
320     igraph_vector_ptr_t mycliques;
321     int no_of_edges = igraph_ecount(graph);
322     igraph_vector_t subv;
323     igraph_t subg;
324     int i, nographs, nocliques;
325     igraph_t *newgraphs = 0;
326     igraph_vector_t *newweights = 0;
327     igraph_vector_int_t *newids = 0;
328     igraph_vector_t clique_thr, next_thr;
329     igraph_i_subclique_next_free_t freedata = { 0, 0, 0, 0 };
330 
331     IGRAPH_CHECK(igraph_vector_ptr_init(&mycliques, 0));
332     IGRAPH_FINALLY(igraph_i_graphlets_destroy_vectorlist, &mycliques);
333     IGRAPH_VECTOR_INIT_FINALLY(&subv, 0);
334 
335     /* We start by finding cliques at the lowest threshold */
336     for (i = 0; i < no_of_edges; i++) {
337         if (VECTOR(*weights)[i] >= startthr) {
338             IGRAPH_CHECK(igraph_vector_push_back(&subv, i));
339         }
340     }
341     igraph_subgraph_edges(graph, &subg, igraph_ess_vector(&subv),
342                           /*delete_vertices=*/ 0);
343     IGRAPH_FINALLY(igraph_destroy, &subg);
344     igraph_maximal_cliques(&subg, &mycliques, /*min_size=*/ 0, /*max_size=*/ 0);
345     igraph_destroy(&subg);
346     IGRAPH_FINALLY_CLEAN(1);
347     nocliques = igraph_vector_ptr_size(&mycliques);
348 
349     igraph_vector_destroy(&subv);
350     IGRAPH_FINALLY_CLEAN(1);
351 
352     /* Get the next cliques and thresholds */
353     IGRAPH_VECTOR_INIT_FINALLY(&next_thr, 0);
354     IGRAPH_VECTOR_INIT_FINALLY(&clique_thr, 0);
355 
356     igraph_i_subclique_next(graph, weights, ids, &mycliques,
357                             &newgraphs, &newweights, &newids,
358                             &clique_thr, &next_thr);
359 
360     freedata.result = newgraphs;
361     freedata.resultids = newids;
362     freedata.resultweights = newweights;
363     freedata.nc = nocliques;
364     IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
365 
366     /* Store cliques at the current level */
367     igraph_vector_append(thresholds, &clique_thr);
368     for (i = 0; i < nocliques; i++) {
369         igraph_vector_t *cl = (igraph_vector_t*) VECTOR(mycliques)[i];
370         int j, n = igraph_vector_size(cl);
371         for (j = 0; j < n; j++) {
372             int node = VECTOR(*cl)[j];
373             VECTOR(*cl)[j] = VECTOR(*ids)[node];
374         }
375         igraph_vector_sort(cl);
376     }
377     igraph_vector_ptr_append(cliques, &mycliques);
378 
379     /* Recursive calls for cliques found */
380     nographs = igraph_vector_ptr_size(&mycliques);
381     for (i = 0; i < nographs; i++) {
382         igraph_t *g = newgraphs + i;
383         if (igraph_vcount(g) > 1) {
384             igraph_vector_t *w = newweights + i;
385             igraph_vector_int_t *ids = newids + i;
386             igraph_i_graphlets(g, w, cliques, thresholds, ids, VECTOR(next_thr)[i]);
387         }
388     }
389 
390     igraph_vector_destroy(&clique_thr);
391     igraph_vector_destroy(&next_thr);
392     igraph_i_subclique_next_free(&freedata);
393     igraph_vector_ptr_destroy(&mycliques); /* contents was copied over */
394     IGRAPH_FINALLY_CLEAN(4);
395 
396     return 0;
397 }
398 
399 typedef struct {
400     const igraph_vector_ptr_t *cliques;
401     const igraph_vector_t *thresholds;
402 } igraph_i_graphlets_filter_t;
403 
igraph_i_graphlets_filter_cmp(void * data,const void * a,const void * b)404 static int igraph_i_graphlets_filter_cmp(void *data, const void *a, const void *b) {
405     igraph_i_graphlets_filter_t *ddata = (igraph_i_graphlets_filter_t *) data;
406     int *aa = (int*) a;
407     int *bb = (int*) b;
408     igraph_real_t t_a = VECTOR(*ddata->thresholds)[*aa];
409     igraph_real_t t_b = VECTOR(*ddata->thresholds)[*bb];
410     igraph_vector_t *v_a, *v_b;
411     int s_a, s_b;
412 
413     if (t_a < t_b) {
414         return -1;
415     } else if (t_a > t_b) {
416         return 1;
417     }
418 
419     v_a = (igraph_vector_t*) VECTOR(*ddata->cliques)[*aa];
420     v_b = (igraph_vector_t*) VECTOR(*ddata->cliques)[*bb];
421     s_a = igraph_vector_size(v_a);
422     s_b = igraph_vector_size(v_b);
423 
424     if (s_a < s_b) {
425         return -1;
426     } else if (s_a > s_b) {
427         return 1;
428     } else {
429         return 0;
430     }
431 }
432 
igraph_i_graphlets_filter(igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)433 static int igraph_i_graphlets_filter(igraph_vector_ptr_t *cliques,
434                                      igraph_vector_t *thresholds) {
435 
436     /* Filter out non-maximal cliques. Every non-maximal clique is
437        part of a maximal clique, at the same threshold.
438 
439        First we order the cliques, according to their threshold, and
440        then according to their size. So when we look for a candidate
441        superset, we only need to check the cliques next in the list,
442        until their threshold is different. */
443 
444     int i, iptr, nocliques = igraph_vector_ptr_size(cliques);
445     igraph_vector_int_t order;
446     igraph_i_graphlets_filter_t sortdata = { cliques, thresholds };
447 
448     igraph_vector_int_init(&order, nocliques);
449     IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
450     for (i = 0; i < nocliques; i++) {
451         VECTOR(order)[i] = i;
452     }
453 
454     igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
455                    igraph_i_graphlets_filter_cmp);
456 
457     for (i = 0; i < nocliques - 1; i++) {
458         int ri = VECTOR(order)[i];
459         igraph_vector_t *needle = VECTOR(*cliques)[ri];
460         igraph_real_t thr_i = VECTOR(*thresholds)[ri];
461         int n_i = igraph_vector_size(needle);
462         int j = i + 1;
463 
464         for (j = i + 1; j < nocliques; j++) {
465             int rj = VECTOR(order)[j];
466             igraph_real_t thr_j = VECTOR(*thresholds)[rj];
467             igraph_vector_t *hay;
468             int n_j, pi = 0, pj = 0;
469 
470             /* Done, not found */
471             if (thr_j != thr_i) {
472                 break;
473             }
474 
475             /* Check size of hay */
476             hay = VECTOR(*cliques)[rj];
477             n_j = igraph_vector_size(hay);
478             if (n_i > n_j) {
479                 continue;
480             }
481 
482             /* Check if hay is a superset */
483             while (pi < n_i && pj < n_j && n_i - pi <= n_j - pj) {
484                 int ei = VECTOR(*needle)[pi];
485                 int ej = VECTOR(*hay)[pj];
486                 if (ei < ej) {
487                     break;
488                 } else if (ei > ej) {
489                     pj++;
490                 } else {
491                     pi++; pj++;
492                 }
493             }
494             if (pi == n_i) {
495                 /* Found, delete immediately */
496                 igraph_vector_destroy(needle);
497                 igraph_free(needle);
498                 VECTOR(*cliques)[ri] = 0;
499                 break;
500             }
501         }
502     }
503 
504     /* Remove null pointers from the list of cliques */
505     for (i = 0, iptr = 0; i < nocliques; i++) {
506         igraph_vector_t *v = VECTOR(*cliques)[i];
507         if (v) {
508             VECTOR(*cliques)[iptr] = v;
509             VECTOR(*thresholds)[iptr] = VECTOR(*thresholds)[i];
510             iptr++;
511         }
512     }
513     igraph_vector_ptr_resize(cliques, iptr);
514     igraph_vector_resize(thresholds, iptr);
515 
516     igraph_vector_int_destroy(&order);
517     IGRAPH_FINALLY_CLEAN(1);
518 
519     return 0;
520 }
521 
522 /**
523  * \function igraph_graphlets_candidate_basis
524  * Calculate a candidate graphlets basis
525  *
526  * \param graph The input graph, it must be a simple graph, edge directions are
527  *        ignored.
528  * \param weights Weights of the edges, a vector.
529  * \param cliques An initialized vector of pointers.
530  *        The graphlet basis is stored here. Each element of the pointer
531  *        vector will be a vector of vertex ids. Each elements must be
532  *        destroyed using \ref igraph_vector_destroy() and \ref igraph_free().
533  * \param thresholds An initialized vector, the (highest possible)
534  *        weight thresholds for finding the basis subgraphs are stored
535  *        here.
536  * \return Error code.
537  *
538  * See also: \ref igraph_graphlets() and \ref igraph_graphlets_project().
539  */
540 
igraph_graphlets_candidate_basis(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)541 int igraph_graphlets_candidate_basis(const igraph_t *graph,
542                                      const igraph_vector_t *weights,
543                                      igraph_vector_ptr_t *cliques,
544                                      igraph_vector_t *thresholds) {
545 
546     int no_of_nodes = igraph_vcount(graph);
547     int no_of_edges = igraph_ecount(graph);
548     igraph_real_t minthr;
549     igraph_vector_int_t ids;
550     igraph_bool_t simple;
551     int i;
552 
553     /* Some checks */
554     if (weights == NULL) {
555         IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
556     }
557 
558     if (igraph_vector_size(weights) != no_of_edges) {
559         IGRAPH_ERROR("Invalid weight vector length", IGRAPH_EINVAL);
560     }
561 
562     igraph_is_simple(graph, &simple);
563     if (!simple) {
564         IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
565     }
566 
567     minthr = igraph_vector_min(weights);
568     igraph_vector_ptr_clear(cliques);
569     igraph_vector_clear(thresholds);
570     igraph_vector_int_init(&ids, no_of_nodes);
571     IGRAPH_FINALLY(igraph_vector_int_destroy, &ids);
572     for (i = 0; i < no_of_nodes; i++) {
573         VECTOR(ids)[i] = i;
574     }
575 
576     igraph_i_graphlets(graph, weights, cliques, thresholds, &ids, minthr);
577 
578     igraph_vector_int_destroy(&ids);
579     IGRAPH_FINALLY_CLEAN(1);
580 
581     igraph_i_graphlets_filter(cliques, thresholds);
582 
583     return 0;
584 }
585 
586 /* TODO: not made static because it is used by the R interface */
igraph_i_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter,int vid1)587 int igraph_i_graphlets_project(const igraph_t *graph,
588                                const igraph_vector_t *weights,
589                                const igraph_vector_ptr_t *cliques,
590                                igraph_vector_t *Mu, igraph_bool_t startMu,
591                                int niter, int vid1) {
592 
593     int no_of_nodes = igraph_vcount(graph);
594     int no_of_edges = igraph_ecount(graph);
595     int no_cliques = igraph_vector_ptr_size(cliques);
596     igraph_vector_int_t vcl, vclidx, ecl, eclidx, cel, celidx;
597     igraph_vector_t edgelist, newweights, normfact;
598     int i, total_vertices, e, ptr, total_edges;
599     igraph_bool_t simple;
600 
601     /* Check arguments */
602     if (weights == NULL) {
603         IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
604     }
605     if (no_of_edges != igraph_vector_size(weights)) {
606         IGRAPH_ERROR("Invalid weight vector size", IGRAPH_EINVAL);
607     }
608     if (startMu && igraph_vector_size(Mu) != no_cliques) {
609         IGRAPH_ERROR("Invalid start coefficient vector size", IGRAPH_EINVAL);
610     }
611     if (niter < 0) {
612         IGRAPH_ERROR("Number of iterations must be non-negative", IGRAPH_EINVAL);
613     }
614     igraph_is_simple(graph, &simple);
615     if (!simple) {
616         IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
617     }
618 
619     if (!startMu) {
620         igraph_vector_resize(Mu, no_cliques);
621         igraph_vector_fill(Mu, 1);
622     }
623 
624     /* Count # cliques per vertex. Also, create an index
625        for the edges per clique. */
626     IGRAPH_CHECK(igraph_vector_int_init(&vclidx, no_of_nodes + 2));
627     IGRAPH_FINALLY(igraph_vector_int_destroy, &vclidx);
628     IGRAPH_CHECK(igraph_vector_int_init(&celidx, no_cliques + 3));
629     IGRAPH_FINALLY(igraph_vector_int_destroy, &celidx);
630     for (i = 0, total_vertices = 0, total_edges = 0; i < no_cliques; i++) {
631         igraph_vector_t *v = VECTOR(*cliques)[i];
632         int j, n = igraph_vector_size(v);
633         total_vertices += n;
634         total_edges += n * (n - 1) / 2;
635         VECTOR(celidx)[i + 2] = total_edges;
636         for (j = 0; j < n; j++) {
637             int vv = VECTOR(*v)[j] - vid1;
638             VECTOR(vclidx)[vv + 2] += 1;
639         }
640     }
641     VECTOR(celidx)[i + 2] = total_edges;
642 
643     /* Finalize index vector */
644     for (i = 0; i < no_of_nodes; i++) {
645         VECTOR(vclidx)[i + 2] += VECTOR(vclidx)[i + 1];
646     }
647 
648     /* Create vertex-clique list, the cliques for each vertex. */
649     IGRAPH_CHECK(igraph_vector_int_init(&vcl, total_vertices));
650     IGRAPH_FINALLY(igraph_vector_int_destroy, &vcl);
651     for (i = 0; i < no_cliques; i++) {
652         igraph_vector_t *v = VECTOR(*cliques)[i];
653         int j, n = igraph_vector_size(v);
654         for (j = 0; j < n; j++) {
655             int vv = VECTOR(*v)[j] - vid1;
656             int p = VECTOR(vclidx)[vv + 1];
657             VECTOR(vcl)[p] = i;
658             VECTOR(vclidx)[vv + 1] += 1;
659         }
660     }
661 
662     /* Create an edge-clique list, the cliques of each edge */
663     IGRAPH_CHECK(igraph_vector_int_init(&ecl, total_edges));
664     IGRAPH_FINALLY(igraph_vector_int_destroy, &ecl);
665     IGRAPH_CHECK(igraph_vector_int_init(&eclidx, no_of_edges + 1));
666     IGRAPH_FINALLY(igraph_vector_int_destroy, &eclidx);
667     IGRAPH_CHECK(igraph_vector_init(&edgelist, no_of_edges * 2));
668     IGRAPH_FINALLY(igraph_vector_destroy, &edgelist);
669     IGRAPH_CHECK(igraph_get_edgelist(graph, &edgelist, /*by_col=*/ 0));
670     for (i = 0, e = 0, ptr = 0; e < no_of_edges; e++) {
671         int from = VECTOR(edgelist)[i++];
672         int to = VECTOR(edgelist)[i++];
673         int from_s = VECTOR(vclidx)[from];
674         int from_e = VECTOR(vclidx)[from + 1];
675         int to_s = VECTOR(vclidx)[to];
676         int to_e = VECTOR(vclidx)[to + 1];
677         VECTOR(eclidx)[e] = ptr;
678         while (from_s < from_e && to_s < to_e) {
679             int from_v = VECTOR(vcl)[from_s];
680             int to_v = VECTOR(vcl)[to_s];
681             if (from_v == to_v) {
682                 VECTOR(ecl)[ptr++] = from_v;
683                 from_s++; to_s++;
684             } else if (from_v < to_v) {
685                 from_s++;
686             } else {
687                 to_s++;
688             }
689         }
690     }
691     VECTOR(eclidx)[e] = ptr;
692 
693     igraph_vector_destroy(&edgelist);
694     IGRAPH_FINALLY_CLEAN(1);
695 
696     /* Convert the edge-clique list to a clique-edge list */
697     IGRAPH_CHECK(igraph_vector_int_init(&cel, total_edges));
698     IGRAPH_FINALLY(igraph_vector_int_destroy, &cel);
699     for (i = 0; i < no_of_edges; i++) {
700         int ecl_s = VECTOR(eclidx)[i], ecl_e = VECTOR(eclidx)[i + 1], j;
701         for (j = ecl_s; j < ecl_e; j++) {
702             int cl = VECTOR(ecl)[j];
703             int epos = VECTOR(celidx)[cl + 1];
704             VECTOR(cel)[epos] = i;
705             VECTOR(celidx)[cl + 1] += 1;
706         }
707     }
708 
709     /* Normalizing factors for the iteration */
710     IGRAPH_CHECK(igraph_vector_init(&normfact, no_cliques));
711     IGRAPH_FINALLY(igraph_vector_destroy, &normfact);
712     for (i = 0; i < no_cliques; i++) {
713         igraph_vector_t *v = VECTOR(*cliques)[i];
714         int n = igraph_vector_size(v);
715         VECTOR(normfact)[i] = n * (n + 1) / 2;
716     }
717 
718     /* We have the clique-edge list, so do the projection now */
719     IGRAPH_CHECK(igraph_vector_init(&newweights, no_of_edges));
720     IGRAPH_FINALLY(igraph_vector_destroy, &newweights);
721     for (i = 0; i < niter; i++) {
722         for (e = 0; e < no_of_edges; e++) {
723             int start = VECTOR(eclidx)[e];
724             int end = VECTOR(eclidx)[e + 1];
725             VECTOR(newweights)[e] = 0.0001;
726             while (start < end) {
727                 int clique = VECTOR(ecl)[start++];
728                 VECTOR(newweights)[e] += VECTOR(*Mu)[clique];
729             }
730         }
731         for (e = 0; e < no_cliques; e++) {
732             igraph_real_t sumratio = 0;
733             int start = VECTOR(celidx)[e];
734             int end = VECTOR(celidx)[e + 1];
735             while (start < end) {
736                 int edge = VECTOR(cel)[start++];
737                 sumratio += VECTOR(*weights)[edge] / VECTOR(newweights)[edge];
738             }
739             VECTOR(*Mu)[e] *= sumratio / VECTOR(normfact)[e];
740         }
741     }
742 
743     igraph_vector_destroy(&newweights);
744     igraph_vector_destroy(&normfact);
745     igraph_vector_int_destroy(&cel);
746     igraph_vector_int_destroy(&eclidx);
747     igraph_vector_int_destroy(&ecl);
748     igraph_vector_int_destroy(&vcl);
749     igraph_vector_int_destroy(&celidx);
750     igraph_vector_int_destroy(&vclidx);
751     IGRAPH_FINALLY_CLEAN(8);
752 
753     return 0;
754 }
755 
756 /**
757  * \function igraph_graphlets_project
758  * Project a graph on a graphlets basis
759  *
760  * Note that the graph projected does not have to be the same that
761  * was used to calculate the graphlet basis, but it is assumed that
762  * it has the same number of vertices, and the vertex ids of the two
763  * graphs match.
764  * \param graph The input graph, it must be a simple graph, edge directions are
765  *        ignored.
766  * \param weights Weights of the edges in the input graph, a vector.
767  * \param cliques The graphlet basis, a pointer vector, in which each
768  *        element is a vector of vertex ids.
769  * \param Mu An initialized vector, the weights of the graphlets will
770  *        be stored here. This vector is also used to initialize the
771  *        the weight vector for the iterative algorithm, if the
772  *        \c startMu argument is true (non-zero).
773  * \param startMu If true (non-zero), then the supplied Mu vector is
774  *        used as the starting point of the iteration. Otherwise a
775  *        constant 1 vector is used.
776  * \param niter Integer scalar, the number of iterations to perform.
777  * \return Error code.
778  *
779  * See also: \ref igraph_graphlets() and
780  * \ref igraph_graphlets_candidate_basis().
781  */
782 
igraph_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter)783 int igraph_graphlets_project(const igraph_t *graph,
784                              const igraph_vector_t *weights,
785                              const igraph_vector_ptr_t *cliques,
786                              igraph_vector_t *Mu, igraph_bool_t startMu,
787                              int niter) {
788 
789     return igraph_i_graphlets_project(graph, weights, cliques, Mu, startMu,
790                                       niter, /*vid1=*/ 0);
791 }
792 
793 typedef struct igraph_i_graphlets_order_t {
794     const igraph_vector_ptr_t *cliques;
795     const igraph_vector_t *Mu;
796 } igraph_i_graphlets_order_t;
797 
igraph_i_graphlets_order_cmp(void * data,const void * a,const void * b)798 static int igraph_i_graphlets_order_cmp(void *data, const void *a, const void *b) {
799     igraph_i_graphlets_order_t *ddata = (igraph_i_graphlets_order_t*) data;
800     int *aa = (int*) a;
801     int *bb = (int*) b;
802     igraph_real_t Mu_a = VECTOR(*ddata->Mu)[*aa];
803     igraph_real_t Mu_b = VECTOR(*ddata->Mu)[*bb];
804 
805     if (Mu_a < Mu_b) {
806         return 1;
807     } else if (Mu_a > Mu_b) {
808         return -1;
809     } else {
810         return 0;
811     }
812 }
813 
814 /**
815  * \function igraph_graphlets
816  * Calculate graphlets basis and project the graph on it
817  *
818  * This function simply calls \ref igraph_graphlets_candidate_basis()
819  * and \ref igraph_graphlets_project(), and then orders the graphlets
820  * according to decreasing weights.
821  * \param graph The input graph, it must be a simple graph, edge directions are
822  *        ignored.
823  * \param weights Weights of the edges, a vector.
824  * \param cliques An initialized vector of pointers.
825  *        The graphlet basis is stored here. Each element of the pointer
826  *        vector will be a vector of vertex ids.
827  * \param Mu An initialized vector, the weights of the graphlets will
828  *        be stored here.
829  * \param niter Integer scalar, the number of iterations to perform
830  *        for the projection step.
831  * \return Error code.
832  *
833  * See also: \ref igraph_graphlets_candidate_basis() and
834  * \ref igraph_graphlets_project().
835  */
836 
igraph_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,int niter)837 int igraph_graphlets(const igraph_t *graph,
838                      const igraph_vector_t *weights,
839                      igraph_vector_ptr_t *cliques,
840                      igraph_vector_t *Mu, int niter) {
841 
842     int i, nocliques;
843     igraph_vector_t thresholds;
844     igraph_vector_int_t order;
845     igraph_i_graphlets_order_t sortdata = { cliques, Mu };
846 
847     igraph_vector_init(&thresholds, 0);
848     IGRAPH_FINALLY(igraph_vector_destroy, &thresholds);
849     igraph_graphlets_candidate_basis(graph, weights, cliques, &thresholds);
850     igraph_vector_destroy(&thresholds);
851     IGRAPH_FINALLY_CLEAN(1);
852 
853     igraph_graphlets_project(graph, weights, cliques, Mu, /*startMu=*/ 0, niter);
854 
855     nocliques = igraph_vector_ptr_size(cliques);
856     igraph_vector_int_init(&order, nocliques);
857     IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
858     for (i = 0; i < nocliques; i++) {
859         VECTOR(order)[i] = i;
860     }
861     igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
862                    igraph_i_graphlets_order_cmp);
863 
864     igraph_vector_ptr_index_int(cliques, &order);
865     igraph_vector_index_int(Mu, &order);
866 
867     igraph_vector_int_destroy(&order);
868     IGRAPH_FINALLY_CLEAN(1);
869 
870     return 0;
871 }
872