1 /* -*- mode: C -*- */
2 /*
3 IGraph library.
4 Copyright (C) 2013 Gabor Csardi <csardi.gabor@gmail.com>
5 334 Harvard street, Cambridge, MA 02139 USA
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
21
22 */
23
24 #include "igraph_graphlets.h"
25
26 #include "igraph_conversion.h"
27 #include "igraph_constructors.h"
28 #include "igraph_cliques.h"
29 #include "igraph_memory.h"
30 #include "igraph_operators.h"
31 #include "igraph_qsort.h"
32 #include "igraph_structural.h"
33
34 /**
35 * \section graphlets_intro Introduction
36 *
37 * <para>
38 * Graphlet decomposition models a weighted undirected graph
39 * via the union of potentially overlapping dense social groups.
40 * This is done by a two-step algorithm. In the first step, a candidate
41 * set of groups (a candidate basis) is created by finding cliques
42 * in the thresholded input graph. In the second step,
43 * the graph is projected onto the candidate basis, resulting in a
44 * weight coefficient for each clique in the candidate basis.
45 * </para>
46 *
47 * <para>
48 * For more information on graphlet decomposition, see
49 * Hossein Azari Soufiani and Edoardo M Airoldi: "Graphlet decomposition of a weighted network",
50 * https://arxiv.org/abs/1203.2821 and http://proceedings.mlr.press/v22/azari12/azari12.pdf
51 * </para>
52 *
53 * <para>
54 * igraph contains three functions for performing the graphlet
55 * decomponsition of a graph. The first is \ref igraph_graphlets(), which
56 * performs both steps of the method and returns a list of subgraphs
57 * with their corresponding weights. The other two functions
58 * correspond to the first and second steps of the algorithm, and they are
59 * useful if the user wishes to perform them individually:
60 * \ref igraph_graphlets_candidate_basis() and
61 * \ref igraph_graphlets_project().
62 * </para>
63 *
64 * <para>
65 * <remark>
66 * Note: The term "graphlet" is used for several unrelated concepts
67 * in the literature. If you are looking to count induced subgraphs, see
68 * \ref igraph_motifs_randesu() and \ref igraph_subisomorphic_lad().
69 * </remark>
70 * </para>
71 */
72
73 typedef struct {
74 igraph_vector_int_t *resultids;
75 igraph_t *result;
76 igraph_vector_t *resultweights;
77 int nc;
78 } igraph_i_subclique_next_free_t;
79
igraph_i_subclique_next_free(void * ptr)80 static void igraph_i_subclique_next_free(void *ptr) {
81 igraph_i_subclique_next_free_t *data = ptr;
82 int i;
83 if (data->resultids) {
84 for (i = 0; i < data->nc; i++) {
85 if (data->resultids + i) {
86 igraph_vector_int_destroy(data->resultids + i);
87 }
88 }
89 IGRAPH_FREE(data->resultids);
90 }
91 if (data->result) {
92 for (i = 0; i < data->nc; i++) {
93 if (data->result + i) {
94 igraph_destroy(data->result + i);
95 }
96 }
97 IGRAPH_FREE(data->result);
98 }
99 if (data->resultweights) {
100 for (i = 0; i < data->nc; i++) {
101 if (data->resultweights + i) {
102 igraph_vector_destroy(data->resultweights + i);
103 }
104 }
105 IGRAPH_FREE(data->resultweights);
106 }
107 }
108
109 /**
110 * \function igraph_i_subclique_next
111 * Calculate subcliques of the cliques found at the previous level
112 *
113 * \param graph Input graph.
114 * \param weight Edge weights.
115 * \param ids The ids of the vertices in the input graph.
116 * \param cliques A list of vectors, vertex ids for cliques.
117 * \param result The result is stored here, a list of graphs is stored
118 * here.
119 * \param resultids The ids of the vertices in the result graphs is
120 * stored here.
121 * \param clique_thr The thresholds for the cliques are stored here,
122 * if not a null pointer.
123 * \param next_thr The next thresholds for the cliques are stored
124 * here, if not a null pointer.
125 *
126 */
127
igraph_i_subclique_next(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_int_t * ids,const igraph_vector_ptr_t * cliques,igraph_t ** result,igraph_vector_t ** resultweights,igraph_vector_int_t ** resultids,igraph_vector_t * clique_thr,igraph_vector_t * next_thr)128 static int igraph_i_subclique_next(const igraph_t *graph,
129 const igraph_vector_t *weights,
130 const igraph_vector_int_t *ids,
131 const igraph_vector_ptr_t *cliques,
132 igraph_t **result,
133 igraph_vector_t **resultweights,
134 igraph_vector_int_t **resultids,
135 igraph_vector_t *clique_thr,
136 igraph_vector_t *next_thr) {
137
138 /* The input is a set of cliques, that were found at a previous level.
139 For each clique, we calculate the next threshold, drop the isolate
140 vertices, and create a new graph from them. */
141
142 igraph_vector_int_t mark, map;
143 igraph_vector_int_t edges;
144 igraph_vector_t neis, newedges;
145 igraph_integer_t c, nc = igraph_vector_ptr_size(cliques);
146 igraph_integer_t no_of_nodes = igraph_vcount(graph);
147 igraph_integer_t no_of_edges = igraph_ecount(graph);
148 igraph_i_subclique_next_free_t freedata = { 0, 0, 0, nc };
149
150 if (igraph_vector_size(weights) != no_of_edges) {
151 IGRAPH_ERROR("Invalid length of weight vector", IGRAPH_EINVAL);
152 }
153
154 if (igraph_vector_int_size(ids) != no_of_nodes) {
155 IGRAPH_ERROR("Invalid length of ID vector", IGRAPH_EINVAL);
156 }
157
158 IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
159 *resultids = IGRAPH_CALLOC(nc, igraph_vector_int_t);
160 if (!*resultids) {
161 IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
162 }
163 freedata.resultids = *resultids;
164 *resultweights = IGRAPH_CALLOC(nc, igraph_vector_t);
165 if (!*resultweights) {
166 IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
167 }
168 freedata.resultweights = *resultweights;
169 *result = IGRAPH_CALLOC(nc, igraph_t);
170 if (!*result) {
171 IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
172 }
173 freedata.result = *result;
174
175 igraph_vector_init(&newedges, 100);
176 IGRAPH_FINALLY(igraph_vector_destroy, &newedges);
177 igraph_vector_int_init(&mark, no_of_nodes);
178 IGRAPH_FINALLY(igraph_vector_int_destroy, &mark);
179 igraph_vector_int_init(&map, no_of_nodes);
180 IGRAPH_FINALLY(igraph_vector_int_destroy, &map);
181 igraph_vector_int_init(&edges, 100);
182 IGRAPH_FINALLY(igraph_vector_int_destroy, &edges);
183 igraph_vector_init(&neis, 10);
184 IGRAPH_FINALLY(igraph_vector_destroy, &neis);
185
186 if (clique_thr) {
187 igraph_vector_resize(clique_thr, nc);
188 }
189 if (next_thr) {
190 igraph_vector_resize(next_thr, nc);
191 }
192
193 /* Iterate over all cliques. We will create graphs for all
194 subgraphs defined by the cliques. */
195
196 for (c = 0; c < nc; c++) {
197 igraph_vector_t *clique = VECTOR(*cliques)[c];
198 igraph_real_t minweight = IGRAPH_INFINITY, nextweight = IGRAPH_INFINITY;
199 igraph_integer_t e, v, clsize = igraph_vector_size(clique);
200 igraph_integer_t noe, nov = 0;
201 igraph_vector_int_t *newids = (*resultids) + c;
202 igraph_vector_t *neww = (*resultweights) + c;
203 igraph_t *newgraph = (*result) + c;
204 igraph_vector_int_clear(&edges);
205 igraph_vector_clear(&newedges);
206
207 /* --------------------------------------------------- */
208
209 /* Iterate over the vertices of a clique and find the
210 edges within the clique, put them in a list.
211 At the same time, search for the minimum edge weight within
212 the clique and the next edge weight if any. */
213
214 for (v = 0; v < clsize; v++) {
215 igraph_integer_t i, neilen, node = VECTOR(*clique)[v];
216 igraph_incident(graph, &neis, node, IGRAPH_ALL);
217 neilen = igraph_vector_size(&neis);
218 VECTOR(mark)[node] = c + 1;
219 for (i = 0; i < neilen; i++) {
220 igraph_integer_t edge = VECTOR(neis)[i];
221 igraph_integer_t nei = IGRAPH_OTHER(graph, edge, node);
222 if (VECTOR(mark)[nei] == c + 1) {
223 igraph_real_t w = VECTOR(*weights)[edge];
224 igraph_vector_int_push_back(&edges, edge);
225 if (w < minweight) {
226 nextweight = minweight;
227 minweight = w;
228 } else if (w > minweight && w < nextweight) {
229 nextweight = w;
230 }
231 }
232 }
233 } /* v < clsize */
234
235 /* --------------------------------------------------- */
236
237 /* OK, we have stored the edges and found the weight of
238 the clique and the next weight to consider */
239
240 if (clique_thr) {
241 VECTOR(*clique_thr)[c] = minweight;
242 }
243 if (next_thr) {
244 VECTOR(*next_thr )[c] = nextweight;
245 }
246
247 /* --------------------------------------------------- */
248
249 /* Now we create the subgraph from the edges above the next
250 threshold, and their incident vertices. */
251
252 igraph_vector_int_init(newids, 0);
253 igraph_vector_init(neww, 0);
254
255 /* We use mark[] to denote the vertices already mapped to
256 the new graph. If this is -(c+1), then the vertex was
257 mapped, otherwise it was not. The mapping itself is in
258 map[]. */
259
260 noe = igraph_vector_int_size(&edges);
261 for (e = 0; e < noe; e++) {
262 igraph_integer_t edge = VECTOR(edges)[e];
263 igraph_integer_t from, to;
264 igraph_real_t w = VECTOR(*weights)[edge];
265 igraph_edge(graph, edge, &from, &to);
266 if (w >= nextweight) {
267 if (VECTOR(mark)[from] == c + 1) {
268 VECTOR(map)[from] = nov++;
269 VECTOR(mark)[from] = -(c + 1);
270 igraph_vector_int_push_back(newids, VECTOR(*ids)[from]);
271 }
272 if (VECTOR(mark)[to] == c + 1) {
273 VECTOR(map)[to] = nov++;
274 VECTOR(mark)[to] = -(c + 1);
275 igraph_vector_int_push_back(newids, VECTOR(*ids)[to]);
276 }
277 igraph_vector_push_back(neww, w);
278 igraph_vector_push_back(&newedges, VECTOR(map)[from]);
279 igraph_vector_push_back(&newedges, VECTOR(map)[to]);
280 }
281 }
282
283 igraph_create(newgraph, &newedges, nov, IGRAPH_UNDIRECTED);
284
285 /* --------------------------------------------------- */
286
287 } /* c < nc */
288
289 igraph_vector_destroy(&neis);
290 igraph_vector_int_destroy(&edges);
291 igraph_vector_int_destroy(&mark);
292 igraph_vector_int_destroy(&map);
293 igraph_vector_destroy(&newedges);
294 IGRAPH_FINALLY_CLEAN(6); /* + freedata */
295
296 return 0;
297 }
298
igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t * vl)299 static void igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t *vl) {
300 int i, n = igraph_vector_ptr_size(vl);
301 for (i = 0; i < n; i++) {
302 igraph_vector_t *v = (igraph_vector_t*) VECTOR(*vl)[i];
303 if (v) {
304 igraph_vector_destroy(v);
305 }
306 }
307 igraph_vector_ptr_destroy(vl);
308 }
309
igraph_i_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds,const igraph_vector_int_t * ids,igraph_real_t startthr)310 static int igraph_i_graphlets(const igraph_t *graph,
311 const igraph_vector_t *weights,
312 igraph_vector_ptr_t *cliques,
313 igraph_vector_t *thresholds,
314 const igraph_vector_int_t *ids,
315 igraph_real_t startthr) {
316
317 /* This version is different from the main function, and is
318 appropriate to use in recursive calls, because it _adds_ the
319 results to 'cliques' and 'thresholds' and uses the supplied
320 'startthr' */
321
322 igraph_vector_ptr_t mycliques;
323 int no_of_edges = igraph_ecount(graph);
324 igraph_vector_t subv;
325 igraph_t subg;
326 int i, nographs, nocliques;
327 igraph_t *newgraphs = 0;
328 igraph_vector_t *newweights = 0;
329 igraph_vector_int_t *newids = 0;
330 igraph_vector_t clique_thr, next_thr;
331 igraph_i_subclique_next_free_t freedata = { 0, 0, 0, 0 };
332
333 IGRAPH_CHECK(igraph_vector_ptr_init(&mycliques, 0));
334 IGRAPH_FINALLY(igraph_i_graphlets_destroy_vectorlist, &mycliques);
335 IGRAPH_VECTOR_INIT_FINALLY(&subv, 0);
336
337 /* We start by finding cliques at the lowest threshold */
338 for (i = 0; i < no_of_edges; i++) {
339 if (VECTOR(*weights)[i] >= startthr) {
340 IGRAPH_CHECK(igraph_vector_push_back(&subv, i));
341 }
342 }
343 igraph_subgraph_edges(graph, &subg, igraph_ess_vector(&subv),
344 /*delete_vertices=*/ 0);
345 IGRAPH_FINALLY(igraph_destroy, &subg);
346 igraph_maximal_cliques(&subg, &mycliques, /*min_size=*/ 0, /*max_size=*/ 0);
347 igraph_destroy(&subg);
348 IGRAPH_FINALLY_CLEAN(1);
349 nocliques = igraph_vector_ptr_size(&mycliques);
350
351 igraph_vector_destroy(&subv);
352 IGRAPH_FINALLY_CLEAN(1);
353
354 /* Get the next cliques and thresholds */
355 IGRAPH_VECTOR_INIT_FINALLY(&next_thr, 0);
356 IGRAPH_VECTOR_INIT_FINALLY(&clique_thr, 0);
357
358 igraph_i_subclique_next(graph, weights, ids, &mycliques,
359 &newgraphs, &newweights, &newids,
360 &clique_thr, &next_thr);
361
362 freedata.result = newgraphs;
363 freedata.resultids = newids;
364 freedata.resultweights = newweights;
365 freedata.nc = nocliques;
366 IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
367
368 /* Store cliques at the current level */
369 igraph_vector_append(thresholds, &clique_thr);
370 for (i = 0; i < nocliques; i++) {
371 igraph_vector_t *cl = (igraph_vector_t*) VECTOR(mycliques)[i];
372 int j, n = igraph_vector_size(cl);
373 for (j = 0; j < n; j++) {
374 int node = VECTOR(*cl)[j];
375 VECTOR(*cl)[j] = VECTOR(*ids)[node];
376 }
377 igraph_vector_sort(cl);
378 }
379 igraph_vector_ptr_append(cliques, &mycliques);
380
381 /* Recursive calls for cliques found */
382 nographs = igraph_vector_ptr_size(&mycliques);
383 for (i = 0; i < nographs; i++) {
384 igraph_t *g = newgraphs + i;
385 if (igraph_vcount(g) > 1) {
386 igraph_vector_t *w_sub = newweights + i;
387 igraph_vector_int_t *ids_sub = newids + i;
388 igraph_i_graphlets(g, w_sub, cliques, thresholds, ids_sub, VECTOR(next_thr)[i]);
389 }
390 }
391
392 igraph_vector_destroy(&clique_thr);
393 igraph_vector_destroy(&next_thr);
394 igraph_i_subclique_next_free(&freedata);
395 igraph_vector_ptr_destroy(&mycliques); /* contents was copied over */
396 IGRAPH_FINALLY_CLEAN(4);
397
398 return 0;
399 }
400
401 typedef struct {
402 const igraph_vector_ptr_t *cliques;
403 const igraph_vector_t *thresholds;
404 } igraph_i_graphlets_filter_t;
405
igraph_i_graphlets_filter_cmp(void * data,const void * a,const void * b)406 static int igraph_i_graphlets_filter_cmp(void *data, const void *a, const void *b) {
407 igraph_i_graphlets_filter_t *ddata = (igraph_i_graphlets_filter_t *) data;
408 int *aa = (int*) a;
409 int *bb = (int*) b;
410 igraph_real_t t_a = VECTOR(*ddata->thresholds)[*aa];
411 igraph_real_t t_b = VECTOR(*ddata->thresholds)[*bb];
412 igraph_vector_t *v_a, *v_b;
413 int s_a, s_b;
414
415 if (t_a < t_b) {
416 return -1;
417 } else if (t_a > t_b) {
418 return 1;
419 }
420
421 v_a = (igraph_vector_t*) VECTOR(*ddata->cliques)[*aa];
422 v_b = (igraph_vector_t*) VECTOR(*ddata->cliques)[*bb];
423 s_a = igraph_vector_size(v_a);
424 s_b = igraph_vector_size(v_b);
425
426 if (s_a < s_b) {
427 return -1;
428 } else if (s_a > s_b) {
429 return 1;
430 } else {
431 return 0;
432 }
433 }
434
igraph_i_graphlets_filter(igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)435 static int igraph_i_graphlets_filter(igraph_vector_ptr_t *cliques,
436 igraph_vector_t *thresholds) {
437
438 /* Filter out non-maximal cliques. Every non-maximal clique is
439 part of a maximal clique, at the same threshold.
440
441 First we order the cliques, according to their threshold, and
442 then according to their size. So when we look for a candidate
443 superset, we only need to check the cliques next in the list,
444 until their threshold is different. */
445
446 int i, iptr, nocliques = igraph_vector_ptr_size(cliques);
447 igraph_vector_int_t order;
448 igraph_i_graphlets_filter_t sortdata = { cliques, thresholds };
449
450 igraph_vector_int_init(&order, nocliques);
451 IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
452 for (i = 0; i < nocliques; i++) {
453 VECTOR(order)[i] = i;
454 }
455
456 igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
457 igraph_i_graphlets_filter_cmp);
458
459 for (i = 0; i < nocliques - 1; i++) {
460 int ri = VECTOR(order)[i];
461 igraph_vector_t *needle = VECTOR(*cliques)[ri];
462 igraph_real_t thr_i = VECTOR(*thresholds)[ri];
463 int n_i = igraph_vector_size(needle);
464 int j = i + 1;
465
466 for (j = i + 1; j < nocliques; j++) {
467 int rj = VECTOR(order)[j];
468 igraph_real_t thr_j = VECTOR(*thresholds)[rj];
469 igraph_vector_t *hay;
470 int n_j, pi = 0, pj = 0;
471
472 /* Done, not found */
473 if (thr_j != thr_i) {
474 break;
475 }
476
477 /* Check size of hay */
478 hay = VECTOR(*cliques)[rj];
479 n_j = igraph_vector_size(hay);
480 if (n_i > n_j) {
481 continue;
482 }
483
484 /* Check if hay is a superset */
485 while (pi < n_i && pj < n_j && n_i - pi <= n_j - pj) {
486 int ei = VECTOR(*needle)[pi];
487 int ej = VECTOR(*hay)[pj];
488 if (ei < ej) {
489 break;
490 } else if (ei > ej) {
491 pj++;
492 } else {
493 pi++; pj++;
494 }
495 }
496 if (pi == n_i) {
497 /* Found, delete immediately */
498 igraph_vector_destroy(needle);
499 igraph_free(needle);
500 VECTOR(*cliques)[ri] = 0;
501 break;
502 }
503 }
504 }
505
506 /* Remove null pointers from the list of cliques */
507 for (i = 0, iptr = 0; i < nocliques; i++) {
508 igraph_vector_t *v = VECTOR(*cliques)[i];
509 if (v) {
510 VECTOR(*cliques)[iptr] = v;
511 VECTOR(*thresholds)[iptr] = VECTOR(*thresholds)[i];
512 iptr++;
513 }
514 }
515 igraph_vector_ptr_resize(cliques, iptr);
516 igraph_vector_resize(thresholds, iptr);
517
518 igraph_vector_int_destroy(&order);
519 IGRAPH_FINALLY_CLEAN(1);
520
521 return 0;
522 }
523
524 /**
525 * \function igraph_graphlets_candidate_basis
526 * Calculate a candidate graphlets basis
527 *
528 * \param graph The input graph, it must be a simple graph, edge directions are
529 * ignored.
530 * \param weights Weights of the edges, a vector.
531 * \param cliques An initialized vector of pointers.
532 * The graphlet basis is stored here. Each element of the pointer
533 * vector will be a vector of vertex ids. Each elements must be
534 * destroyed using \ref igraph_vector_destroy() and \ref igraph_free().
535 * \param thresholds An initialized vector, the (highest possible)
536 * weight thresholds for finding the basis subgraphs are stored
537 * here.
538 * \return Error code.
539 *
540 * See also: \ref igraph_graphlets() and \ref igraph_graphlets_project().
541 */
542
igraph_graphlets_candidate_basis(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)543 int igraph_graphlets_candidate_basis(const igraph_t *graph,
544 const igraph_vector_t *weights,
545 igraph_vector_ptr_t *cliques,
546 igraph_vector_t *thresholds) {
547
548 int no_of_nodes = igraph_vcount(graph);
549 int no_of_edges = igraph_ecount(graph);
550 igraph_real_t minthr;
551 igraph_vector_int_t ids;
552 igraph_bool_t simple;
553 int i;
554
555 /* Some checks */
556 if (weights == NULL) {
557 IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
558 }
559
560 if (igraph_vector_size(weights) != no_of_edges) {
561 IGRAPH_ERROR("Invalid weight vector length", IGRAPH_EINVAL);
562 }
563
564 igraph_is_simple(graph, &simple);
565 if (!simple) {
566 IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
567 }
568
569 minthr = igraph_vector_min(weights);
570 igraph_vector_ptr_clear(cliques);
571 igraph_vector_clear(thresholds);
572 igraph_vector_int_init(&ids, no_of_nodes);
573 IGRAPH_FINALLY(igraph_vector_int_destroy, &ids);
574 for (i = 0; i < no_of_nodes; i++) {
575 VECTOR(ids)[i] = i;
576 }
577
578 igraph_i_graphlets(graph, weights, cliques, thresholds, &ids, minthr);
579
580 igraph_vector_int_destroy(&ids);
581 IGRAPH_FINALLY_CLEAN(1);
582
583 igraph_i_graphlets_filter(cliques, thresholds);
584
585 return 0;
586 }
587
588 /* TODO: not made static because it is used by the R interface */
igraph_i_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter,int vid1)589 int igraph_i_graphlets_project(const igraph_t *graph,
590 const igraph_vector_t *weights,
591 const igraph_vector_ptr_t *cliques,
592 igraph_vector_t *Mu, igraph_bool_t startMu,
593 int niter, int vid1) {
594
595 int no_of_nodes = igraph_vcount(graph);
596 int no_of_edges = igraph_ecount(graph);
597 int no_cliques = igraph_vector_ptr_size(cliques);
598 igraph_vector_int_t vcl, vclidx, ecl, eclidx, cel, celidx;
599 igraph_vector_t edgelist, newweights, normfact;
600 int i, total_vertices, e, ptr, total_edges;
601 igraph_bool_t simple;
602
603 /* Check arguments */
604 if (weights == NULL) {
605 IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
606 }
607 if (no_of_edges != igraph_vector_size(weights)) {
608 IGRAPH_ERROR("Invalid weight vector size", IGRAPH_EINVAL);
609 }
610 if (startMu && igraph_vector_size(Mu) != no_cliques) {
611 IGRAPH_ERROR("Invalid start coefficient vector size", IGRAPH_EINVAL);
612 }
613 if (niter < 0) {
614 IGRAPH_ERROR("Number of iterations must be non-negative", IGRAPH_EINVAL);
615 }
616 igraph_is_simple(graph, &simple);
617 if (!simple) {
618 IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
619 }
620
621 if (!startMu) {
622 igraph_vector_resize(Mu, no_cliques);
623 igraph_vector_fill(Mu, 1);
624 }
625
626 /* Count # cliques per vertex. Also, create an index
627 for the edges per clique. */
628 IGRAPH_CHECK(igraph_vector_int_init(&vclidx, no_of_nodes + 2));
629 IGRAPH_FINALLY(igraph_vector_int_destroy, &vclidx);
630 IGRAPH_CHECK(igraph_vector_int_init(&celidx, no_cliques + 3));
631 IGRAPH_FINALLY(igraph_vector_int_destroy, &celidx);
632 for (i = 0, total_vertices = 0, total_edges = 0; i < no_cliques; i++) {
633 igraph_vector_t *v = VECTOR(*cliques)[i];
634 int j, n = igraph_vector_size(v);
635 total_vertices += n;
636 total_edges += n * (n - 1) / 2;
637 VECTOR(celidx)[i + 2] = total_edges;
638 for (j = 0; j < n; j++) {
639 int vv = VECTOR(*v)[j] - vid1;
640 VECTOR(vclidx)[vv + 2] += 1;
641 }
642 }
643 VECTOR(celidx)[i + 2] = total_edges;
644
645 /* Finalize index vector */
646 for (i = 0; i < no_of_nodes; i++) {
647 VECTOR(vclidx)[i + 2] += VECTOR(vclidx)[i + 1];
648 }
649
650 /* Create vertex-clique list, the cliques for each vertex. */
651 IGRAPH_CHECK(igraph_vector_int_init(&vcl, total_vertices));
652 IGRAPH_FINALLY(igraph_vector_int_destroy, &vcl);
653 for (i = 0; i < no_cliques; i++) {
654 igraph_vector_t *v = VECTOR(*cliques)[i];
655 int j, n = igraph_vector_size(v);
656 for (j = 0; j < n; j++) {
657 int vv = VECTOR(*v)[j] - vid1;
658 int p = VECTOR(vclidx)[vv + 1];
659 VECTOR(vcl)[p] = i;
660 VECTOR(vclidx)[vv + 1] += 1;
661 }
662 }
663
664 /* Create an edge-clique list, the cliques of each edge */
665 IGRAPH_CHECK(igraph_vector_int_init(&ecl, total_edges));
666 IGRAPH_FINALLY(igraph_vector_int_destroy, &ecl);
667 IGRAPH_CHECK(igraph_vector_int_init(&eclidx, no_of_edges + 1));
668 IGRAPH_FINALLY(igraph_vector_int_destroy, &eclidx);
669 IGRAPH_CHECK(igraph_vector_init(&edgelist, no_of_edges * 2));
670 IGRAPH_FINALLY(igraph_vector_destroy, &edgelist);
671 IGRAPH_CHECK(igraph_get_edgelist(graph, &edgelist, /*by_col=*/ 0));
672 for (i = 0, e = 0, ptr = 0; e < no_of_edges; e++) {
673 int from = VECTOR(edgelist)[i++];
674 int to = VECTOR(edgelist)[i++];
675 int from_s = VECTOR(vclidx)[from];
676 int from_e = VECTOR(vclidx)[from + 1];
677 int to_s = VECTOR(vclidx)[to];
678 int to_e = VECTOR(vclidx)[to + 1];
679 VECTOR(eclidx)[e] = ptr;
680 while (from_s < from_e && to_s < to_e) {
681 int from_v = VECTOR(vcl)[from_s];
682 int to_v = VECTOR(vcl)[to_s];
683 if (from_v == to_v) {
684 VECTOR(ecl)[ptr++] = from_v;
685 from_s++; to_s++;
686 } else if (from_v < to_v) {
687 from_s++;
688 } else {
689 to_s++;
690 }
691 }
692 }
693 VECTOR(eclidx)[e] = ptr;
694
695 igraph_vector_destroy(&edgelist);
696 IGRAPH_FINALLY_CLEAN(1);
697
698 /* Convert the edge-clique list to a clique-edge list */
699 IGRAPH_CHECK(igraph_vector_int_init(&cel, total_edges));
700 IGRAPH_FINALLY(igraph_vector_int_destroy, &cel);
701 for (i = 0; i < no_of_edges; i++) {
702 int ecl_s = VECTOR(eclidx)[i], ecl_e = VECTOR(eclidx)[i + 1], j;
703 for (j = ecl_s; j < ecl_e; j++) {
704 int cl = VECTOR(ecl)[j];
705 int epos = VECTOR(celidx)[cl + 1];
706 VECTOR(cel)[epos] = i;
707 VECTOR(celidx)[cl + 1] += 1;
708 }
709 }
710
711 /* Normalizing factors for the iteration */
712 IGRAPH_CHECK(igraph_vector_init(&normfact, no_cliques));
713 IGRAPH_FINALLY(igraph_vector_destroy, &normfact);
714 for (i = 0; i < no_cliques; i++) {
715 igraph_vector_t *v = VECTOR(*cliques)[i];
716 int n = igraph_vector_size(v);
717 VECTOR(normfact)[i] = n * (n + 1) / 2;
718 }
719
720 /* We have the clique-edge list, so do the projection now */
721 IGRAPH_CHECK(igraph_vector_init(&newweights, no_of_edges));
722 IGRAPH_FINALLY(igraph_vector_destroy, &newweights);
723 for (i = 0; i < niter; i++) {
724 for (e = 0; e < no_of_edges; e++) {
725 int start = VECTOR(eclidx)[e];
726 int end = VECTOR(eclidx)[e + 1];
727 VECTOR(newweights)[e] = 0.0001;
728 while (start < end) {
729 int clique = VECTOR(ecl)[start++];
730 VECTOR(newweights)[e] += VECTOR(*Mu)[clique];
731 }
732 }
733 for (e = 0; e < no_cliques; e++) {
734 igraph_real_t sumratio = 0;
735 int start = VECTOR(celidx)[e];
736 int end = VECTOR(celidx)[e + 1];
737 while (start < end) {
738 int edge = VECTOR(cel)[start++];
739 sumratio += VECTOR(*weights)[edge] / VECTOR(newweights)[edge];
740 }
741 VECTOR(*Mu)[e] *= sumratio / VECTOR(normfact)[e];
742 }
743 }
744
745 igraph_vector_destroy(&newweights);
746 igraph_vector_destroy(&normfact);
747 igraph_vector_int_destroy(&cel);
748 igraph_vector_int_destroy(&eclidx);
749 igraph_vector_int_destroy(&ecl);
750 igraph_vector_int_destroy(&vcl);
751 igraph_vector_int_destroy(&celidx);
752 igraph_vector_int_destroy(&vclidx);
753 IGRAPH_FINALLY_CLEAN(8);
754
755 return 0;
756 }
757
758 /**
759 * \function igraph_graphlets_project
760 * Project a graph on a graphlets basis
761 *
762 * Note that the graph projected does not have to be the same that
763 * was used to calculate the graphlet basis, but it is assumed that
764 * it has the same number of vertices, and the vertex ids of the two
765 * graphs match.
766 * \param graph The input graph, it must be a simple graph, edge directions are
767 * ignored.
768 * \param weights Weights of the edges in the input graph, a vector.
769 * \param cliques The graphlet basis, a pointer vector, in which each
770 * element is a vector of vertex ids.
771 * \param Mu An initialized vector, the weights of the graphlets will
772 * be stored here. This vector is also used to initialize the
773 * the weight vector for the iterative algorithm, if the
774 * \c startMu argument is true (non-zero).
775 * \param startMu If true (non-zero), then the supplied Mu vector is
776 * used as the starting point of the iteration. Otherwise a
777 * constant 1 vector is used.
778 * \param niter Integer scalar, the number of iterations to perform.
779 * \return Error code.
780 *
781 * See also: \ref igraph_graphlets() and
782 * \ref igraph_graphlets_candidate_basis().
783 */
784
igraph_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter)785 int igraph_graphlets_project(const igraph_t *graph,
786 const igraph_vector_t *weights,
787 const igraph_vector_ptr_t *cliques,
788 igraph_vector_t *Mu, igraph_bool_t startMu,
789 int niter) {
790
791 return igraph_i_graphlets_project(graph, weights, cliques, Mu, startMu,
792 niter, /*vid1=*/ 0);
793 }
794
795 typedef struct igraph_i_graphlets_order_t {
796 const igraph_vector_ptr_t *cliques;
797 const igraph_vector_t *Mu;
798 } igraph_i_graphlets_order_t;
799
igraph_i_graphlets_order_cmp(void * data,const void * a,const void * b)800 static int igraph_i_graphlets_order_cmp(void *data, const void *a, const void *b) {
801 igraph_i_graphlets_order_t *ddata = (igraph_i_graphlets_order_t*) data;
802 int *aa = (int*) a;
803 int *bb = (int*) b;
804 igraph_real_t Mu_a = VECTOR(*ddata->Mu)[*aa];
805 igraph_real_t Mu_b = VECTOR(*ddata->Mu)[*bb];
806
807 if (Mu_a < Mu_b) {
808 return 1;
809 } else if (Mu_a > Mu_b) {
810 return -1;
811 } else {
812 return 0;
813 }
814 }
815
816 /**
817 * \function igraph_graphlets
818 * Calculate graphlets basis and project the graph on it
819 *
820 * This function simply calls \ref igraph_graphlets_candidate_basis()
821 * and \ref igraph_graphlets_project(), and then orders the graphlets
822 * according to decreasing weights.
823 * \param graph The input graph, it must be a simple graph, edge directions are
824 * ignored.
825 * \param weights Weights of the edges, a vector.
826 * \param cliques An initialized vector of pointers.
827 * The graphlet basis is stored here. Each element of the pointer
828 * vector will be a vector of vertex ids.
829 * \param Mu An initialized vector, the weights of the graphlets will
830 * be stored here.
831 * \param niter Integer scalar, the number of iterations to perform
832 * for the projection step.
833 * \return Error code.
834 *
835 * See also: \ref igraph_graphlets_candidate_basis() and
836 * \ref igraph_graphlets_project().
837 */
838
igraph_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,int niter)839 int igraph_graphlets(const igraph_t *graph,
840 const igraph_vector_t *weights,
841 igraph_vector_ptr_t *cliques,
842 igraph_vector_t *Mu, int niter) {
843
844 int i, nocliques;
845 igraph_vector_t thresholds;
846 igraph_vector_int_t order;
847 igraph_i_graphlets_order_t sortdata = { cliques, Mu };
848
849 igraph_vector_init(&thresholds, 0);
850 IGRAPH_FINALLY(igraph_vector_destroy, &thresholds);
851 igraph_graphlets_candidate_basis(graph, weights, cliques, &thresholds);
852 igraph_vector_destroy(&thresholds);
853 IGRAPH_FINALLY_CLEAN(1);
854
855 igraph_graphlets_project(graph, weights, cliques, Mu, /*startMu=*/ 0, niter);
856
857 nocliques = igraph_vector_ptr_size(cliques);
858 igraph_vector_int_init(&order, nocliques);
859 IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
860 for (i = 0; i < nocliques; i++) {
861 VECTOR(order)[i] = i;
862 }
863 igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
864 igraph_i_graphlets_order_cmp);
865
866 igraph_vector_ptr_index_int(cliques, &order);
867 igraph_vector_index_int(Mu, &order);
868
869 igraph_vector_int_destroy(&order);
870 IGRAPH_FINALLY_CLEAN(1);
871
872 return 0;
873 }
874