1 /* -*- mode: C -*- */
2 /*
3 IGraph library.
4 Copyright (C) 2013 Gabor Csardi <csardi.gabor@gmail.com>
5 334 Harvard street, Cambridge, MA 02139 USA
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
21
22 */
23
24 #include "igraph_graphlets.h"
25 #include "igraph_memory.h"
26 #include "igraph_constructors.h"
27 #include "igraph_cliques.h"
28 #include "igraph_structural.h"
29 #include "igraph_qsort.h"
30 #include "igraph_conversion.h"
31
32 /**
33 * \section graphlets_intro Introduction
34 *
35 * <para>
36 * Graphlet decomposition models a weighted undirected graph
37 * via the union of potentially overlapping dense social groups.
38 * This is done by a two-step algorithm. In the first step, a candidate
39 * set of groups (a candidate basis) is created by finding cliques
40 * in the thresholded input graph. In the second step,
41 * the graph is projected onto the candidate basis, resulting in a
42 * weight coefficient for each clique in the candidate basis.
43 * </para>
44 *
45 * <para>
46 * For more information on graphlet decomposition, see
47 * Hossein Azari Soufiani and Edoardo M Airoldi: "Graphlet decomposition of a weighted network",
48 * https://arxiv.org/abs/1203.2821 and http://proceedings.mlr.press/v22/azari12/azari12.pdf
49 * </para>
50 *
51 * <para>
52 * igraph contains three functions for performing the graphlet
53 * decomponsition of a graph. The first is \ref igraph_graphlets(), which
54 * performs both steps of the method and returns a list of subgraphs
55 * with their corresponding weights. The other two functions
56 * correspond to the first and second steps of the algorithm, and they are
57 * useful if the user wishes to perform them individually:
58 * \ref igraph_graphlets_candidate_basis() and
59 * \ref igraph_graphlets_project().
60 * </para>
61 *
62 * <para>
63 * <remark>
64 * Note: The term "graphlet" is used for several unrelated concepts
65 * in the literature. If you are looking to count induced subgraphs, see
66 * \ref igraph_motifs_randesu() and \ref igraph_subisomorphic_lad().
67 * </remark>
68 * </para>
69 */
70
71 typedef struct {
72 igraph_vector_int_t *resultids;
73 igraph_t *result;
74 igraph_vector_t *resultweights;
75 int nc;
76 } igraph_i_subclique_next_free_t;
77
igraph_i_subclique_next_free(void * ptr)78 static void igraph_i_subclique_next_free(void *ptr) {
79 igraph_i_subclique_next_free_t *data = ptr;
80 int i;
81 if (data->resultids) {
82 for (i = 0; i < data->nc; i++) {
83 if (data->resultids + i) {
84 igraph_vector_int_destroy(data->resultids + i);
85 }
86 }
87 igraph_Free(data->resultids);
88 }
89 if (data->result) {
90 for (i = 0; i < data->nc; i++) {
91 if (data->result + i) {
92 igraph_destroy(data->result + i);
93 }
94 }
95 igraph_Free(data->result);
96 }
97 if (data->resultweights) {
98 for (i = 0; i < data->nc; i++) {
99 if (data->resultweights + i) {
100 igraph_vector_destroy(data->resultweights + i);
101 }
102 }
103 igraph_Free(data->resultweights);
104 }
105 }
106
107 /**
108 * \function igraph_i_subclique_next
109 * Calculate subcliques of the cliques found at the previous level
110 *
111 * \param graph Input graph.
112 * \param weight Edge weights.
113 * \param ids The ids of the vertices in the input graph.
114 * \param cliques A list of vectors, vertex ids for cliques.
115 * \param result The result is stored here, a list of graphs is stored
116 * here.
117 * \param resultids The ids of the vertices in the result graphs is
118 * stored here.
119 * \param clique_thr The thresholds for the cliques are stored here,
120 * if not a null pointer.
121 * \param next_thr The next thresholds for the cliques are stored
122 * here, if not a null pointer.
123 *
124 */
125
igraph_i_subclique_next(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_int_t * ids,const igraph_vector_ptr_t * cliques,igraph_t ** result,igraph_vector_t ** resultweights,igraph_vector_int_t ** resultids,igraph_vector_t * clique_thr,igraph_vector_t * next_thr)126 static int igraph_i_subclique_next(const igraph_t *graph,
127 const igraph_vector_t *weights,
128 const igraph_vector_int_t *ids,
129 const igraph_vector_ptr_t *cliques,
130 igraph_t **result,
131 igraph_vector_t **resultweights,
132 igraph_vector_int_t **resultids,
133 igraph_vector_t *clique_thr,
134 igraph_vector_t *next_thr) {
135
136 /* The input is a set of cliques, that were found at a previous level.
137 For each clique, we calculate the next threshold, drop the isolate
138 vertices, and create a new graph from them. */
139
140 igraph_vector_int_t mark, map;
141 igraph_vector_int_t edges;
142 igraph_vector_t neis, newedges;
143 igraph_integer_t c, nc = igraph_vector_ptr_size(cliques);
144 igraph_integer_t no_of_nodes = igraph_vcount(graph);
145 igraph_integer_t no_of_edges = igraph_ecount(graph);
146 igraph_i_subclique_next_free_t freedata = { 0, 0, 0, nc };
147
148 if (igraph_vector_size(weights) != no_of_edges) {
149 IGRAPH_ERROR("Invalid length of weight vector", IGRAPH_EINVAL);
150 }
151
152 if (igraph_vector_int_size(ids) != no_of_nodes) {
153 IGRAPH_ERROR("Invalid length of ID vector", IGRAPH_EINVAL);
154 }
155
156 IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
157 *resultids = igraph_Calloc(nc, igraph_vector_int_t);
158 if (!*resultids) {
159 IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
160 }
161 freedata.resultids = *resultids;
162 *resultweights = igraph_Calloc(nc, igraph_vector_t);
163 if (!*resultweights) {
164 IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
165 }
166 freedata.resultweights = *resultweights;
167 *result = igraph_Calloc(nc, igraph_t);
168 if (!*result) {
169 IGRAPH_ERROR("Cannot calculate next cliques", IGRAPH_ENOMEM);
170 }
171 freedata.result = *result;
172
173 igraph_vector_init(&newedges, 100);
174 IGRAPH_FINALLY(igraph_vector_destroy, &newedges);
175 igraph_vector_int_init(&mark, no_of_nodes);
176 IGRAPH_FINALLY(igraph_vector_int_destroy, &mark);
177 igraph_vector_int_init(&map, no_of_nodes);
178 IGRAPH_FINALLY(igraph_vector_int_destroy, &map);
179 igraph_vector_int_init(&edges, 100);
180 IGRAPH_FINALLY(igraph_vector_int_destroy, &edges);
181 igraph_vector_init(&neis, 10);
182 IGRAPH_FINALLY(igraph_vector_destroy, &neis);
183
184 if (clique_thr) {
185 igraph_vector_resize(clique_thr, nc);
186 }
187 if (next_thr) {
188 igraph_vector_resize(next_thr, nc);
189 }
190
191 /* Iterate over all cliques. We will create graphs for all
192 subgraphs defined by the cliques. */
193
194 for (c = 0; c < nc; c++) {
195 igraph_vector_t *clique = VECTOR(*cliques)[c];
196 igraph_real_t minweight = IGRAPH_INFINITY, nextweight = IGRAPH_INFINITY;
197 igraph_integer_t e, v, clsize = igraph_vector_size(clique);
198 igraph_integer_t noe, nov = 0;
199 igraph_vector_int_t *newids = (*resultids) + c;
200 igraph_vector_t *neww = (*resultweights) + c;
201 igraph_t *newgraph = (*result) + c;
202 igraph_vector_int_clear(&edges);
203 igraph_vector_clear(&newedges);
204
205 /* --------------------------------------------------- */
206
207 /* Iterate over the vertices of a clique and find the
208 edges within the clique, put them in a list.
209 At the same time, search for the minimum edge weight within
210 the clique and the next edge weight if any. */
211
212 for (v = 0; v < clsize; v++) {
213 igraph_integer_t i, neilen, node = VECTOR(*clique)[v];
214 igraph_incident(graph, &neis, node, IGRAPH_ALL);
215 neilen = igraph_vector_size(&neis);
216 VECTOR(mark)[node] = c + 1;
217 for (i = 0; i < neilen; i++) {
218 igraph_integer_t edge = VECTOR(neis)[i];
219 igraph_integer_t nei = IGRAPH_OTHER(graph, edge, node);
220 if (VECTOR(mark)[nei] == c + 1) {
221 igraph_real_t w = VECTOR(*weights)[edge];
222 igraph_vector_int_push_back(&edges, edge);
223 if (w < minweight) {
224 nextweight = minweight;
225 minweight = w;
226 } else if (w > minweight && w < nextweight) {
227 nextweight = w;
228 }
229 }
230 }
231 } /* v < clsize */
232
233 /* --------------------------------------------------- */
234
235 /* OK, we have stored the edges and found the weight of
236 the clique and the next weight to consider */
237
238 if (clique_thr) {
239 VECTOR(*clique_thr)[c] = minweight;
240 }
241 if (next_thr) {
242 VECTOR(*next_thr )[c] = nextweight;
243 }
244
245 /* --------------------------------------------------- */
246
247 /* Now we create the subgraph from the edges above the next
248 threshold, and their incident vertices. */
249
250 igraph_vector_int_init(newids, 0);
251 igraph_vector_init(neww, 0);
252
253 /* We use mark[] to denote the vertices already mapped to
254 the new graph. If this is -(c+1), then the vertex was
255 mapped, otherwise it was not. The mapping itself is in
256 map[]. */
257
258 noe = igraph_vector_int_size(&edges);
259 for (e = 0; e < noe; e++) {
260 igraph_integer_t edge = VECTOR(edges)[e];
261 igraph_integer_t from, to;
262 igraph_real_t w = VECTOR(*weights)[edge];
263 igraph_edge(graph, edge, &from, &to);
264 if (w >= nextweight) {
265 if (VECTOR(mark)[from] == c + 1) {
266 VECTOR(map)[from] = nov++;
267 VECTOR(mark)[from] = -(c + 1);
268 igraph_vector_int_push_back(newids, VECTOR(*ids)[from]);
269 }
270 if (VECTOR(mark)[to] == c + 1) {
271 VECTOR(map)[to] = nov++;
272 VECTOR(mark)[to] = -(c + 1);
273 igraph_vector_int_push_back(newids, VECTOR(*ids)[to]);
274 }
275 igraph_vector_push_back(neww, w);
276 igraph_vector_push_back(&newedges, VECTOR(map)[from]);
277 igraph_vector_push_back(&newedges, VECTOR(map)[to]);
278 }
279 }
280
281 igraph_create(newgraph, &newedges, nov, IGRAPH_UNDIRECTED);
282
283 /* --------------------------------------------------- */
284
285 } /* c < nc */
286
287 igraph_vector_destroy(&neis);
288 igraph_vector_int_destroy(&edges);
289 igraph_vector_int_destroy(&mark);
290 igraph_vector_int_destroy(&map);
291 igraph_vector_destroy(&newedges);
292 IGRAPH_FINALLY_CLEAN(6); /* + freedata */
293
294 return 0;
295 }
296
igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t * vl)297 static void igraph_i_graphlets_destroy_vectorlist(igraph_vector_ptr_t *vl) {
298 int i, n = igraph_vector_ptr_size(vl);
299 for (i = 0; i < n; i++) {
300 igraph_vector_t *v = (igraph_vector_t*) VECTOR(*vl)[i];
301 if (v) {
302 igraph_vector_destroy(v);
303 }
304 }
305 igraph_vector_ptr_destroy(vl);
306 }
307
igraph_i_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds,const igraph_vector_int_t * ids,igraph_real_t startthr)308 static int igraph_i_graphlets(const igraph_t *graph,
309 const igraph_vector_t *weights,
310 igraph_vector_ptr_t *cliques,
311 igraph_vector_t *thresholds,
312 const igraph_vector_int_t *ids,
313 igraph_real_t startthr) {
314
315 /* This version is different from the main function, and is
316 appropriate to use in recursive calls, because it _adds_ the
317 results to 'cliques' and 'thresholds' and uses the supplied
318 'startthr' */
319
320 igraph_vector_ptr_t mycliques;
321 int no_of_edges = igraph_ecount(graph);
322 igraph_vector_t subv;
323 igraph_t subg;
324 int i, nographs, nocliques;
325 igraph_t *newgraphs = 0;
326 igraph_vector_t *newweights = 0;
327 igraph_vector_int_t *newids = 0;
328 igraph_vector_t clique_thr, next_thr;
329 igraph_i_subclique_next_free_t freedata = { 0, 0, 0, 0 };
330
331 IGRAPH_CHECK(igraph_vector_ptr_init(&mycliques, 0));
332 IGRAPH_FINALLY(igraph_i_graphlets_destroy_vectorlist, &mycliques);
333 IGRAPH_VECTOR_INIT_FINALLY(&subv, 0);
334
335 /* We start by finding cliques at the lowest threshold */
336 for (i = 0; i < no_of_edges; i++) {
337 if (VECTOR(*weights)[i] >= startthr) {
338 IGRAPH_CHECK(igraph_vector_push_back(&subv, i));
339 }
340 }
341 igraph_subgraph_edges(graph, &subg, igraph_ess_vector(&subv),
342 /*delete_vertices=*/ 0);
343 IGRAPH_FINALLY(igraph_destroy, &subg);
344 igraph_maximal_cliques(&subg, &mycliques, /*min_size=*/ 0, /*max_size=*/ 0);
345 igraph_destroy(&subg);
346 IGRAPH_FINALLY_CLEAN(1);
347 nocliques = igraph_vector_ptr_size(&mycliques);
348
349 igraph_vector_destroy(&subv);
350 IGRAPH_FINALLY_CLEAN(1);
351
352 /* Get the next cliques and thresholds */
353 IGRAPH_VECTOR_INIT_FINALLY(&next_thr, 0);
354 IGRAPH_VECTOR_INIT_FINALLY(&clique_thr, 0);
355
356 igraph_i_subclique_next(graph, weights, ids, &mycliques,
357 &newgraphs, &newweights, &newids,
358 &clique_thr, &next_thr);
359
360 freedata.result = newgraphs;
361 freedata.resultids = newids;
362 freedata.resultweights = newweights;
363 freedata.nc = nocliques;
364 IGRAPH_FINALLY(igraph_i_subclique_next_free, &freedata);
365
366 /* Store cliques at the current level */
367 igraph_vector_append(thresholds, &clique_thr);
368 for (i = 0; i < nocliques; i++) {
369 igraph_vector_t *cl = (igraph_vector_t*) VECTOR(mycliques)[i];
370 int j, n = igraph_vector_size(cl);
371 for (j = 0; j < n; j++) {
372 int node = VECTOR(*cl)[j];
373 VECTOR(*cl)[j] = VECTOR(*ids)[node];
374 }
375 igraph_vector_sort(cl);
376 }
377 igraph_vector_ptr_append(cliques, &mycliques);
378
379 /* Recursive calls for cliques found */
380 nographs = igraph_vector_ptr_size(&mycliques);
381 for (i = 0; i < nographs; i++) {
382 igraph_t *g = newgraphs + i;
383 if (igraph_vcount(g) > 1) {
384 igraph_vector_t *w = newweights + i;
385 igraph_vector_int_t *ids = newids + i;
386 igraph_i_graphlets(g, w, cliques, thresholds, ids, VECTOR(next_thr)[i]);
387 }
388 }
389
390 igraph_vector_destroy(&clique_thr);
391 igraph_vector_destroy(&next_thr);
392 igraph_i_subclique_next_free(&freedata);
393 igraph_vector_ptr_destroy(&mycliques); /* contents was copied over */
394 IGRAPH_FINALLY_CLEAN(4);
395
396 return 0;
397 }
398
399 typedef struct {
400 const igraph_vector_ptr_t *cliques;
401 const igraph_vector_t *thresholds;
402 } igraph_i_graphlets_filter_t;
403
igraph_i_graphlets_filter_cmp(void * data,const void * a,const void * b)404 static int igraph_i_graphlets_filter_cmp(void *data, const void *a, const void *b) {
405 igraph_i_graphlets_filter_t *ddata = (igraph_i_graphlets_filter_t *) data;
406 int *aa = (int*) a;
407 int *bb = (int*) b;
408 igraph_real_t t_a = VECTOR(*ddata->thresholds)[*aa];
409 igraph_real_t t_b = VECTOR(*ddata->thresholds)[*bb];
410 igraph_vector_t *v_a, *v_b;
411 int s_a, s_b;
412
413 if (t_a < t_b) {
414 return -1;
415 } else if (t_a > t_b) {
416 return 1;
417 }
418
419 v_a = (igraph_vector_t*) VECTOR(*ddata->cliques)[*aa];
420 v_b = (igraph_vector_t*) VECTOR(*ddata->cliques)[*bb];
421 s_a = igraph_vector_size(v_a);
422 s_b = igraph_vector_size(v_b);
423
424 if (s_a < s_b) {
425 return -1;
426 } else if (s_a > s_b) {
427 return 1;
428 } else {
429 return 0;
430 }
431 }
432
igraph_i_graphlets_filter(igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)433 static int igraph_i_graphlets_filter(igraph_vector_ptr_t *cliques,
434 igraph_vector_t *thresholds) {
435
436 /* Filter out non-maximal cliques. Every non-maximal clique is
437 part of a maximal clique, at the same threshold.
438
439 First we order the cliques, according to their threshold, and
440 then according to their size. So when we look for a candidate
441 superset, we only need to check the cliques next in the list,
442 until their threshold is different. */
443
444 int i, iptr, nocliques = igraph_vector_ptr_size(cliques);
445 igraph_vector_int_t order;
446 igraph_i_graphlets_filter_t sortdata = { cliques, thresholds };
447
448 igraph_vector_int_init(&order, nocliques);
449 IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
450 for (i = 0; i < nocliques; i++) {
451 VECTOR(order)[i] = i;
452 }
453
454 igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
455 igraph_i_graphlets_filter_cmp);
456
457 for (i = 0; i < nocliques - 1; i++) {
458 int ri = VECTOR(order)[i];
459 igraph_vector_t *needle = VECTOR(*cliques)[ri];
460 igraph_real_t thr_i = VECTOR(*thresholds)[ri];
461 int n_i = igraph_vector_size(needle);
462 int j = i + 1;
463
464 for (j = i + 1; j < nocliques; j++) {
465 int rj = VECTOR(order)[j];
466 igraph_real_t thr_j = VECTOR(*thresholds)[rj];
467 igraph_vector_t *hay;
468 int n_j, pi = 0, pj = 0;
469
470 /* Done, not found */
471 if (thr_j != thr_i) {
472 break;
473 }
474
475 /* Check size of hay */
476 hay = VECTOR(*cliques)[rj];
477 n_j = igraph_vector_size(hay);
478 if (n_i > n_j) {
479 continue;
480 }
481
482 /* Check if hay is a superset */
483 while (pi < n_i && pj < n_j && n_i - pi <= n_j - pj) {
484 int ei = VECTOR(*needle)[pi];
485 int ej = VECTOR(*hay)[pj];
486 if (ei < ej) {
487 break;
488 } else if (ei > ej) {
489 pj++;
490 } else {
491 pi++; pj++;
492 }
493 }
494 if (pi == n_i) {
495 /* Found, delete immediately */
496 igraph_vector_destroy(needle);
497 igraph_free(needle);
498 VECTOR(*cliques)[ri] = 0;
499 break;
500 }
501 }
502 }
503
504 /* Remove null pointers from the list of cliques */
505 for (i = 0, iptr = 0; i < nocliques; i++) {
506 igraph_vector_t *v = VECTOR(*cliques)[i];
507 if (v) {
508 VECTOR(*cliques)[iptr] = v;
509 VECTOR(*thresholds)[iptr] = VECTOR(*thresholds)[i];
510 iptr++;
511 }
512 }
513 igraph_vector_ptr_resize(cliques, iptr);
514 igraph_vector_resize(thresholds, iptr);
515
516 igraph_vector_int_destroy(&order);
517 IGRAPH_FINALLY_CLEAN(1);
518
519 return 0;
520 }
521
522 /**
523 * \function igraph_graphlets_candidate_basis
524 * Calculate a candidate graphlets basis
525 *
526 * \param graph The input graph, it must be a simple graph, edge directions are
527 * ignored.
528 * \param weights Weights of the edges, a vector.
529 * \param cliques An initialized vector of pointers.
530 * The graphlet basis is stored here. Each element of the pointer
531 * vector will be a vector of vertex ids. Each elements must be
532 * destroyed using \ref igraph_vector_destroy() and \ref igraph_free().
533 * \param thresholds An initialized vector, the (highest possible)
534 * weight thresholds for finding the basis subgraphs are stored
535 * here.
536 * \return Error code.
537 *
538 * See also: \ref igraph_graphlets() and \ref igraph_graphlets_project().
539 */
540
igraph_graphlets_candidate_basis(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * thresholds)541 int igraph_graphlets_candidate_basis(const igraph_t *graph,
542 const igraph_vector_t *weights,
543 igraph_vector_ptr_t *cliques,
544 igraph_vector_t *thresholds) {
545
546 int no_of_nodes = igraph_vcount(graph);
547 int no_of_edges = igraph_ecount(graph);
548 igraph_real_t minthr;
549 igraph_vector_int_t ids;
550 igraph_bool_t simple;
551 int i;
552
553 /* Some checks */
554 if (weights == NULL) {
555 IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
556 }
557
558 if (igraph_vector_size(weights) != no_of_edges) {
559 IGRAPH_ERROR("Invalid weight vector length", IGRAPH_EINVAL);
560 }
561
562 igraph_is_simple(graph, &simple);
563 if (!simple) {
564 IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
565 }
566
567 minthr = igraph_vector_min(weights);
568 igraph_vector_ptr_clear(cliques);
569 igraph_vector_clear(thresholds);
570 igraph_vector_int_init(&ids, no_of_nodes);
571 IGRAPH_FINALLY(igraph_vector_int_destroy, &ids);
572 for (i = 0; i < no_of_nodes; i++) {
573 VECTOR(ids)[i] = i;
574 }
575
576 igraph_i_graphlets(graph, weights, cliques, thresholds, &ids, minthr);
577
578 igraph_vector_int_destroy(&ids);
579 IGRAPH_FINALLY_CLEAN(1);
580
581 igraph_i_graphlets_filter(cliques, thresholds);
582
583 return 0;
584 }
585
586 /* TODO: not made static because it is used by the R interface */
igraph_i_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter,int vid1)587 int igraph_i_graphlets_project(const igraph_t *graph,
588 const igraph_vector_t *weights,
589 const igraph_vector_ptr_t *cliques,
590 igraph_vector_t *Mu, igraph_bool_t startMu,
591 int niter, int vid1) {
592
593 int no_of_nodes = igraph_vcount(graph);
594 int no_of_edges = igraph_ecount(graph);
595 int no_cliques = igraph_vector_ptr_size(cliques);
596 igraph_vector_int_t vcl, vclidx, ecl, eclidx, cel, celidx;
597 igraph_vector_t edgelist, newweights, normfact;
598 int i, total_vertices, e, ptr, total_edges;
599 igraph_bool_t simple;
600
601 /* Check arguments */
602 if (weights == NULL) {
603 IGRAPH_ERROR("Graphlet functions require weighted graphs", IGRAPH_EINVAL);
604 }
605 if (no_of_edges != igraph_vector_size(weights)) {
606 IGRAPH_ERROR("Invalid weight vector size", IGRAPH_EINVAL);
607 }
608 if (startMu && igraph_vector_size(Mu) != no_cliques) {
609 IGRAPH_ERROR("Invalid start coefficient vector size", IGRAPH_EINVAL);
610 }
611 if (niter < 0) {
612 IGRAPH_ERROR("Number of iterations must be non-negative", IGRAPH_EINVAL);
613 }
614 igraph_is_simple(graph, &simple);
615 if (!simple) {
616 IGRAPH_ERROR("Graphlets work on simple graphs only", IGRAPH_EINVAL);
617 }
618
619 if (!startMu) {
620 igraph_vector_resize(Mu, no_cliques);
621 igraph_vector_fill(Mu, 1);
622 }
623
624 /* Count # cliques per vertex. Also, create an index
625 for the edges per clique. */
626 IGRAPH_CHECK(igraph_vector_int_init(&vclidx, no_of_nodes + 2));
627 IGRAPH_FINALLY(igraph_vector_int_destroy, &vclidx);
628 IGRAPH_CHECK(igraph_vector_int_init(&celidx, no_cliques + 3));
629 IGRAPH_FINALLY(igraph_vector_int_destroy, &celidx);
630 for (i = 0, total_vertices = 0, total_edges = 0; i < no_cliques; i++) {
631 igraph_vector_t *v = VECTOR(*cliques)[i];
632 int j, n = igraph_vector_size(v);
633 total_vertices += n;
634 total_edges += n * (n - 1) / 2;
635 VECTOR(celidx)[i + 2] = total_edges;
636 for (j = 0; j < n; j++) {
637 int vv = VECTOR(*v)[j] - vid1;
638 VECTOR(vclidx)[vv + 2] += 1;
639 }
640 }
641 VECTOR(celidx)[i + 2] = total_edges;
642
643 /* Finalize index vector */
644 for (i = 0; i < no_of_nodes; i++) {
645 VECTOR(vclidx)[i + 2] += VECTOR(vclidx)[i + 1];
646 }
647
648 /* Create vertex-clique list, the cliques for each vertex. */
649 IGRAPH_CHECK(igraph_vector_int_init(&vcl, total_vertices));
650 IGRAPH_FINALLY(igraph_vector_int_destroy, &vcl);
651 for (i = 0; i < no_cliques; i++) {
652 igraph_vector_t *v = VECTOR(*cliques)[i];
653 int j, n = igraph_vector_size(v);
654 for (j = 0; j < n; j++) {
655 int vv = VECTOR(*v)[j] - vid1;
656 int p = VECTOR(vclidx)[vv + 1];
657 VECTOR(vcl)[p] = i;
658 VECTOR(vclidx)[vv + 1] += 1;
659 }
660 }
661
662 /* Create an edge-clique list, the cliques of each edge */
663 IGRAPH_CHECK(igraph_vector_int_init(&ecl, total_edges));
664 IGRAPH_FINALLY(igraph_vector_int_destroy, &ecl);
665 IGRAPH_CHECK(igraph_vector_int_init(&eclidx, no_of_edges + 1));
666 IGRAPH_FINALLY(igraph_vector_int_destroy, &eclidx);
667 IGRAPH_CHECK(igraph_vector_init(&edgelist, no_of_edges * 2));
668 IGRAPH_FINALLY(igraph_vector_destroy, &edgelist);
669 IGRAPH_CHECK(igraph_get_edgelist(graph, &edgelist, /*by_col=*/ 0));
670 for (i = 0, e = 0, ptr = 0; e < no_of_edges; e++) {
671 int from = VECTOR(edgelist)[i++];
672 int to = VECTOR(edgelist)[i++];
673 int from_s = VECTOR(vclidx)[from];
674 int from_e = VECTOR(vclidx)[from + 1];
675 int to_s = VECTOR(vclidx)[to];
676 int to_e = VECTOR(vclidx)[to + 1];
677 VECTOR(eclidx)[e] = ptr;
678 while (from_s < from_e && to_s < to_e) {
679 int from_v = VECTOR(vcl)[from_s];
680 int to_v = VECTOR(vcl)[to_s];
681 if (from_v == to_v) {
682 VECTOR(ecl)[ptr++] = from_v;
683 from_s++; to_s++;
684 } else if (from_v < to_v) {
685 from_s++;
686 } else {
687 to_s++;
688 }
689 }
690 }
691 VECTOR(eclidx)[e] = ptr;
692
693 igraph_vector_destroy(&edgelist);
694 IGRAPH_FINALLY_CLEAN(1);
695
696 /* Convert the edge-clique list to a clique-edge list */
697 IGRAPH_CHECK(igraph_vector_int_init(&cel, total_edges));
698 IGRAPH_FINALLY(igraph_vector_int_destroy, &cel);
699 for (i = 0; i < no_of_edges; i++) {
700 int ecl_s = VECTOR(eclidx)[i], ecl_e = VECTOR(eclidx)[i + 1], j;
701 for (j = ecl_s; j < ecl_e; j++) {
702 int cl = VECTOR(ecl)[j];
703 int epos = VECTOR(celidx)[cl + 1];
704 VECTOR(cel)[epos] = i;
705 VECTOR(celidx)[cl + 1] += 1;
706 }
707 }
708
709 /* Normalizing factors for the iteration */
710 IGRAPH_CHECK(igraph_vector_init(&normfact, no_cliques));
711 IGRAPH_FINALLY(igraph_vector_destroy, &normfact);
712 for (i = 0; i < no_cliques; i++) {
713 igraph_vector_t *v = VECTOR(*cliques)[i];
714 int n = igraph_vector_size(v);
715 VECTOR(normfact)[i] = n * (n + 1) / 2;
716 }
717
718 /* We have the clique-edge list, so do the projection now */
719 IGRAPH_CHECK(igraph_vector_init(&newweights, no_of_edges));
720 IGRAPH_FINALLY(igraph_vector_destroy, &newweights);
721 for (i = 0; i < niter; i++) {
722 for (e = 0; e < no_of_edges; e++) {
723 int start = VECTOR(eclidx)[e];
724 int end = VECTOR(eclidx)[e + 1];
725 VECTOR(newweights)[e] = 0.0001;
726 while (start < end) {
727 int clique = VECTOR(ecl)[start++];
728 VECTOR(newweights)[e] += VECTOR(*Mu)[clique];
729 }
730 }
731 for (e = 0; e < no_cliques; e++) {
732 igraph_real_t sumratio = 0;
733 int start = VECTOR(celidx)[e];
734 int end = VECTOR(celidx)[e + 1];
735 while (start < end) {
736 int edge = VECTOR(cel)[start++];
737 sumratio += VECTOR(*weights)[edge] / VECTOR(newweights)[edge];
738 }
739 VECTOR(*Mu)[e] *= sumratio / VECTOR(normfact)[e];
740 }
741 }
742
743 igraph_vector_destroy(&newweights);
744 igraph_vector_destroy(&normfact);
745 igraph_vector_int_destroy(&cel);
746 igraph_vector_int_destroy(&eclidx);
747 igraph_vector_int_destroy(&ecl);
748 igraph_vector_int_destroy(&vcl);
749 igraph_vector_int_destroy(&celidx);
750 igraph_vector_int_destroy(&vclidx);
751 IGRAPH_FINALLY_CLEAN(8);
752
753 return 0;
754 }
755
756 /**
757 * \function igraph_graphlets_project
758 * Project a graph on a graphlets basis
759 *
760 * Note that the graph projected does not have to be the same that
761 * was used to calculate the graphlet basis, but it is assumed that
762 * it has the same number of vertices, and the vertex ids of the two
763 * graphs match.
764 * \param graph The input graph, it must be a simple graph, edge directions are
765 * ignored.
766 * \param weights Weights of the edges in the input graph, a vector.
767 * \param cliques The graphlet basis, a pointer vector, in which each
768 * element is a vector of vertex ids.
769 * \param Mu An initialized vector, the weights of the graphlets will
770 * be stored here. This vector is also used to initialize the
771 * the weight vector for the iterative algorithm, if the
772 * \c startMu argument is true (non-zero).
773 * \param startMu If true (non-zero), then the supplied Mu vector is
774 * used as the starting point of the iteration. Otherwise a
775 * constant 1 vector is used.
776 * \param niter Integer scalar, the number of iterations to perform.
777 * \return Error code.
778 *
779 * See also: \ref igraph_graphlets() and
780 * \ref igraph_graphlets_candidate_basis().
781 */
782
igraph_graphlets_project(const igraph_t * graph,const igraph_vector_t * weights,const igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,igraph_bool_t startMu,int niter)783 int igraph_graphlets_project(const igraph_t *graph,
784 const igraph_vector_t *weights,
785 const igraph_vector_ptr_t *cliques,
786 igraph_vector_t *Mu, igraph_bool_t startMu,
787 int niter) {
788
789 return igraph_i_graphlets_project(graph, weights, cliques, Mu, startMu,
790 niter, /*vid1=*/ 0);
791 }
792
793 typedef struct igraph_i_graphlets_order_t {
794 const igraph_vector_ptr_t *cliques;
795 const igraph_vector_t *Mu;
796 } igraph_i_graphlets_order_t;
797
igraph_i_graphlets_order_cmp(void * data,const void * a,const void * b)798 static int igraph_i_graphlets_order_cmp(void *data, const void *a, const void *b) {
799 igraph_i_graphlets_order_t *ddata = (igraph_i_graphlets_order_t*) data;
800 int *aa = (int*) a;
801 int *bb = (int*) b;
802 igraph_real_t Mu_a = VECTOR(*ddata->Mu)[*aa];
803 igraph_real_t Mu_b = VECTOR(*ddata->Mu)[*bb];
804
805 if (Mu_a < Mu_b) {
806 return 1;
807 } else if (Mu_a > Mu_b) {
808 return -1;
809 } else {
810 return 0;
811 }
812 }
813
814 /**
815 * \function igraph_graphlets
816 * Calculate graphlets basis and project the graph on it
817 *
818 * This function simply calls \ref igraph_graphlets_candidate_basis()
819 * and \ref igraph_graphlets_project(), and then orders the graphlets
820 * according to decreasing weights.
821 * \param graph The input graph, it must be a simple graph, edge directions are
822 * ignored.
823 * \param weights Weights of the edges, a vector.
824 * \param cliques An initialized vector of pointers.
825 * The graphlet basis is stored here. Each element of the pointer
826 * vector will be a vector of vertex ids.
827 * \param Mu An initialized vector, the weights of the graphlets will
828 * be stored here.
829 * \param niter Integer scalar, the number of iterations to perform
830 * for the projection step.
831 * \return Error code.
832 *
833 * See also: \ref igraph_graphlets_candidate_basis() and
834 * \ref igraph_graphlets_project().
835 */
836
igraph_graphlets(const igraph_t * graph,const igraph_vector_t * weights,igraph_vector_ptr_t * cliques,igraph_vector_t * Mu,int niter)837 int igraph_graphlets(const igraph_t *graph,
838 const igraph_vector_t *weights,
839 igraph_vector_ptr_t *cliques,
840 igraph_vector_t *Mu, int niter) {
841
842 int i, nocliques;
843 igraph_vector_t thresholds;
844 igraph_vector_int_t order;
845 igraph_i_graphlets_order_t sortdata = { cliques, Mu };
846
847 igraph_vector_init(&thresholds, 0);
848 IGRAPH_FINALLY(igraph_vector_destroy, &thresholds);
849 igraph_graphlets_candidate_basis(graph, weights, cliques, &thresholds);
850 igraph_vector_destroy(&thresholds);
851 IGRAPH_FINALLY_CLEAN(1);
852
853 igraph_graphlets_project(graph, weights, cliques, Mu, /*startMu=*/ 0, niter);
854
855 nocliques = igraph_vector_ptr_size(cliques);
856 igraph_vector_int_init(&order, nocliques);
857 IGRAPH_FINALLY(igraph_vector_int_destroy, &order);
858 for (i = 0; i < nocliques; i++) {
859 VECTOR(order)[i] = i;
860 }
861 igraph_qsort_r(VECTOR(order), nocliques, sizeof(int), &sortdata,
862 igraph_i_graphlets_order_cmp);
863
864 igraph_vector_ptr_index_int(cliques, &order);
865 igraph_vector_index_int(Mu, &order);
866
867 igraph_vector_int_destroy(&order);
868 IGRAPH_FINALLY_CLEAN(1);
869
870 return 0;
871 }
872