1 /* -*- mode: C -*- */
2 /*
3 IGraph library.
4 Copyright (C) 2005-2020 The igraph development team
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA
20
21 */
22
23 #include "igraph_foreign.h"
24
25 #include "igraph_attributes.h"
26 #include "igraph_interface.h"
27 #include "igraph_memory.h"
28 #include "igraph_version.h"
29
30 #include "core/trie.h"
31 #include "graph/attributes.h"
32 #include "io/gml-header.h"
33
34 #include <ctype.h>
35 #include <time.h>
36 #include <string.h>
37
38 int igraph_gml_yylex_init_extra (igraph_i_gml_parsedata_t* user_defined,
39 void* scanner);
40 void igraph_gml_yylex_destroy (void *scanner );
41 int igraph_gml_yyparse (igraph_i_gml_parsedata_t* context);
42 void igraph_gml_yyset_in (FILE * in_str, void* yyscanner );
43
igraph_i_gml_destroy_attrs(igraph_vector_ptr_t ** ptr)44 static void igraph_i_gml_destroy_attrs(igraph_vector_ptr_t **ptr) {
45 long int i;
46 igraph_vector_ptr_t *vec;
47 for (i = 0; i < 3; i++) {
48 long int j;
49 vec = ptr[i];
50 for (j = 0; j < igraph_vector_ptr_size(vec); j++) {
51 igraph_attribute_record_t *atrec = VECTOR(*vec)[j];
52 if (atrec->type == IGRAPH_ATTRIBUTE_NUMERIC) {
53 igraph_vector_t *value = (igraph_vector_t*)atrec->value;
54 if (value != 0) {
55 igraph_vector_destroy(value);
56 IGRAPH_FREE(value);
57 }
58 } else {
59 igraph_strvector_t *value = (igraph_strvector_t*)atrec->value;
60 if (value != 0) {
61 igraph_strvector_destroy(value);
62 IGRAPH_FREE(value);
63 }
64 }
65 IGRAPH_FREE(atrec->name);
66 IGRAPH_FREE(atrec);
67 }
68 igraph_vector_ptr_destroy(vec);
69 }
70 }
71
igraph_i_gml_toreal(igraph_gml_tree_t * node,long int pos,igraph_real_t * result)72 static int igraph_i_gml_toreal(igraph_gml_tree_t *node, long int pos, igraph_real_t *result) {
73
74 igraph_real_t value = 0.0;
75 int type = igraph_gml_tree_type(node, pos);
76
77 switch (type) {
78 case IGRAPH_I_GML_TREE_INTEGER:
79 value = igraph_gml_tree_get_integer(node, pos);
80 break;
81 case IGRAPH_I_GML_TREE_REAL:
82 value = igraph_gml_tree_get_real(node, pos);
83 break;
84 default:
85 IGRAPH_ERROR("Internal error while parsing GML file.", IGRAPH_FAILURE);
86 break;
87 }
88
89 *result = value;
90 return IGRAPH_SUCCESS;
91 }
92
igraph_i_gml_tostring(igraph_gml_tree_t * node,long int pos)93 static const char *igraph_i_gml_tostring(igraph_gml_tree_t *node, long int pos) {
94
95 int type = igraph_gml_tree_type(node, pos);
96 static char tmp[256];
97 const char *p = tmp;
98 long int i;
99 igraph_real_t d;
100
101 switch (type) {
102 case IGRAPH_I_GML_TREE_INTEGER:
103 i = igraph_gml_tree_get_integer(node, pos);
104 snprintf(tmp, sizeof(tmp) / sizeof(char), "%li", i);
105 break;
106 case IGRAPH_I_GML_TREE_REAL:
107 d = igraph_gml_tree_get_real(node, pos);
108 igraph_real_snprintf_precise(tmp, sizeof(tmp) / sizeof(char), d);
109 break;
110 case IGRAPH_I_GML_TREE_STRING:
111 p = igraph_gml_tree_get_string(node, pos);
112 break;
113 default:
114 break;
115 }
116
117 return p;
118 }
119
igraph_i_gml_parsedata_init(igraph_i_gml_parsedata_t * context)120 int igraph_i_gml_parsedata_init(igraph_i_gml_parsedata_t* context) {
121 context->eof = 0;
122 context->depth = 0;
123 context->scanner = 0;
124 context->tree = 0;
125 context->errmsg[0] = 0;
126
127 return IGRAPH_SUCCESS;
128 }
129
igraph_i_gml_parsedata_destroy(igraph_i_gml_parsedata_t * context)130 void igraph_i_gml_parsedata_destroy(igraph_i_gml_parsedata_t* context) {
131 if (context->tree != 0) {
132 igraph_gml_tree_destroy(context->tree);
133 context->tree = 0;
134 }
135
136 if (context->scanner != 0) {
137 igraph_gml_yylex_destroy(context->scanner);
138 context->scanner = 0;
139 }
140 }
141
142 /**
143 * \function igraph_read_graph_gml
144 * \brief Read a graph in GML format.
145 *
146 * GML is a simple textual format, see
147 * http://www.fim.uni-passau.de/en/fim/faculty/chairs/theoretische-informatik/projects.html for details.
148 *
149 * </para><para>
150 * Although all syntactically correct GML can be parsed,
151 * we implement only a subset of this format, some attributes might be
152 * ignored. Here is a list of all the differences:
153 * \olist
154 * \oli Only <code>node</code> and <code>edge</code> attributes are
155 * used, and only if they have a simple type: integer, real or
156 * string. So if an attribute is an array or a record, then it is
157 * ignored. This is also true if only some values of the
158 * attribute are complex.
159 * \oli Top level attributes except for <code>Version</code> and the
160 * first <code>graph</code> attribute are completely ignored.
161 * \oli Graph attributes except for <code>node</code> and
162 * <code>edge</code> are completely ignored.
163 * \oli There is no maximum line length.
164 * \oli There is no maximum keyword length.
165 * \oli Character entities in strings are not interpreted.
166 * \oli We allow <code>inf</code> (infinity) and <code>nan</code>
167 * (not a number) as a real number. This is case insensitive, so
168 * <code>nan</code>, <code>NaN</code> and <code>NAN</code> are equal.
169 * \endolist
170 *
171 * </para><para> Please contact us if you cannot live with these
172 * limitations of the GML parser.
173 * \param graph Pointer to an uninitialized graph object.
174 * \param instream The stream to read the GML file from.
175 * \return Error code.
176 *
177 * Time complexity: should be proportional to the length of the file.
178 *
179 * \sa \ref igraph_read_graph_graphml() for a more modern format,
180 * \ref igraph_write_graph_gml() for writing GML files.
181 *
182 * \example examples/simple/gml.c
183 */
igraph_read_graph_gml(igraph_t * graph,FILE * instream)184 int igraph_read_graph_gml(igraph_t *graph, FILE *instream) {
185
186 long int i, p;
187 long int no_of_nodes = 0, no_of_edges = 0;
188 igraph_trie_t trie;
189 igraph_vector_t edges;
190 igraph_bool_t directed = IGRAPH_UNDIRECTED;
191 igraph_gml_tree_t *gtree;
192 long int gidx;
193 igraph_trie_t vattrnames;
194 igraph_trie_t eattrnames;
195 igraph_trie_t gattrnames;
196 igraph_vector_ptr_t gattrs = IGRAPH_VECTOR_PTR_NULL,
197 vattrs = IGRAPH_VECTOR_PTR_NULL, eattrs = IGRAPH_VECTOR_PTR_NULL;
198 igraph_vector_ptr_t *attrs[3];
199 long int edgeptr = 0;
200 igraph_i_gml_parsedata_t context;
201
202 attrs[0] = &gattrs; attrs[1] = &vattrs; attrs[2] = &eattrs;
203
204 IGRAPH_CHECK(igraph_i_gml_parsedata_init(&context));
205 IGRAPH_FINALLY(igraph_i_gml_parsedata_destroy, &context);
206
207 igraph_gml_yylex_init_extra(&context, &context.scanner);
208
209 igraph_gml_yyset_in(instream, context.scanner);
210
211 i = igraph_gml_yyparse(&context);
212 if (i != 0) {
213 if (context.errmsg[0] != 0) {
214 IGRAPH_ERROR(context.errmsg, IGRAPH_PARSEERROR);
215 } else {
216 IGRAPH_ERROR("Cannot read GML file.", IGRAPH_PARSEERROR);
217 }
218 }
219
220 IGRAPH_VECTOR_INIT_FINALLY(&edges, 0);
221
222 /* Check version, if present, integer and not '1' then ignored */
223 i = igraph_gml_tree_find(context.tree, "Version", 0);
224 if (i >= 0 &&
225 igraph_gml_tree_type(context.tree, i) == IGRAPH_I_GML_TREE_INTEGER &&
226 igraph_gml_tree_get_integer(context.tree, i) != 1) {
227 IGRAPH_ERROR("Unknown GML version.", IGRAPH_UNIMPLEMENTED);
228 /* RETURN HERE!!!! */
229 }
230
231 /* get the graph */
232 gidx = igraph_gml_tree_find(context.tree, "graph", 0);
233 if (gidx == -1) {
234 IGRAPH_ERROR("No 'graph' object in GML file.", IGRAPH_PARSEERROR);
235 }
236 if (igraph_gml_tree_type(context.tree, gidx) !=
237 IGRAPH_I_GML_TREE_TREE) {
238 IGRAPH_ERROR("Invalid type for 'graph' object in GML file.", IGRAPH_PARSEERROR);
239 }
240 gtree = igraph_gml_tree_get_tree(context.tree, gidx);
241
242 IGRAPH_FINALLY(igraph_i_gml_destroy_attrs, attrs);
243 igraph_vector_ptr_init(&gattrs, 0);
244 igraph_vector_ptr_init(&vattrs, 0);
245 igraph_vector_ptr_init(&eattrs, 0);
246
247 IGRAPH_TRIE_INIT_FINALLY(&trie, 0);
248 IGRAPH_TRIE_INIT_FINALLY(&vattrnames, 0);
249 IGRAPH_TRIE_INIT_FINALLY(&eattrnames, 0);
250 IGRAPH_TRIE_INIT_FINALLY(&gattrnames, 0);
251
252 /* Is is directed? */
253 i = igraph_gml_tree_find(gtree, "directed", 0);
254 if (i >= 0 && igraph_gml_tree_type(gtree, i) == IGRAPH_I_GML_TREE_INTEGER) {
255 if (igraph_gml_tree_get_integer(gtree, i) == 1) {
256 directed = IGRAPH_DIRECTED;
257 }
258 }
259
260 /* Now we go over all objects in the graph and collect the attribute names and
261 types. Plus we collect node ids. We also do some checks. */
262 for (i = 0; i < igraph_gml_tree_length(gtree); i++) {
263 long int j;
264 char cname[100];
265 const char *name = igraph_gml_tree_name(gtree, i);
266 if (!strcmp(name, "node")) {
267 igraph_gml_tree_t *node;
268 igraph_bool_t hasid;
269 no_of_nodes++;
270 if (igraph_gml_tree_type(gtree, i) != IGRAPH_I_GML_TREE_TREE) {
271 IGRAPH_ERROR("'node' is not a list in GML file.", IGRAPH_PARSEERROR);
272 }
273 node = igraph_gml_tree_get_tree(gtree, i);
274 hasid = 0;
275 for (j = 0; j < igraph_gml_tree_length(node); j++) {
276 const char *name = igraph_gml_tree_name(node, j);
277 long int trieid, triesize = igraph_trie_size(&vattrnames);
278 IGRAPH_CHECK(igraph_trie_get(&vattrnames, name, &trieid));
279 if (trieid == triesize) {
280 /* new attribute */
281 igraph_attribute_record_t *atrec = IGRAPH_CALLOC(1, igraph_attribute_record_t);
282 int type = igraph_gml_tree_type(node, j);
283 if (!atrec) {
284 IGRAPH_ERROR("Cannot read GML file.", IGRAPH_ENOMEM);
285 }
286 IGRAPH_CHECK(igraph_vector_ptr_push_back(&vattrs, atrec));
287 atrec->name = strdup(name);
288 if (type == IGRAPH_I_GML_TREE_INTEGER || type == IGRAPH_I_GML_TREE_REAL) {
289 atrec->type = IGRAPH_ATTRIBUTE_NUMERIC;
290 } else {
291 atrec->type = IGRAPH_ATTRIBUTE_STRING;
292 }
293 } else {
294 /* already seen, should we update type? */
295 igraph_attribute_record_t *atrec = VECTOR(vattrs)[trieid];
296 int type1 = atrec->type;
297 int type2 = igraph_gml_tree_type(node, j);
298 if (type1 == IGRAPH_ATTRIBUTE_NUMERIC && type2 == IGRAPH_I_GML_TREE_STRING) {
299 atrec->type = IGRAPH_ATTRIBUTE_STRING;
300 }
301 }
302 /* check id */
303 if (!hasid && !strcmp(name, "id")) {
304 long int id;
305 if (igraph_gml_tree_type(node, j) != IGRAPH_I_GML_TREE_INTEGER) {
306 IGRAPH_ERROR("Non-integer node id in GML file.", IGRAPH_PARSEERROR);
307 }
308 id = igraph_gml_tree_get_integer(node, j);
309 snprintf(cname, sizeof(cname) / sizeof(char) -1, "%li", id);
310 IGRAPH_CHECK(igraph_trie_get(&trie, cname, &id));
311 hasid = 1;
312 }
313 }
314 if (!hasid) {
315 IGRAPH_ERROR("Node without 'id' while parsing GML file.", IGRAPH_PARSEERROR);
316 }
317 } else if (!strcmp(name, "edge")) {
318 igraph_gml_tree_t *edge;
319 igraph_bool_t has_source = 0, has_target = 0;
320 no_of_edges++;
321 if (igraph_gml_tree_type(gtree, i) != IGRAPH_I_GML_TREE_TREE) {
322 IGRAPH_ERROR("'edge' is not a list in GML file.", IGRAPH_PARSEERROR);
323 }
324 edge = igraph_gml_tree_get_tree(gtree, i);
325 has_source = has_target = 0;
326 for (j = 0; j < igraph_gml_tree_length(edge); j++) {
327 const char *name = igraph_gml_tree_name(edge, j);
328 if (!strcmp(name, "source")) {
329 has_source = 1;
330 if (igraph_gml_tree_type(edge, j) != IGRAPH_I_GML_TREE_INTEGER) {
331 IGRAPH_ERROR("Non-integer 'source' for an edge in GML file.",
332 IGRAPH_PARSEERROR);
333 }
334 } else if (!strcmp(name, "target")) {
335 has_target = 1;
336 if (igraph_gml_tree_type(edge, j) != IGRAPH_I_GML_TREE_INTEGER) {
337 IGRAPH_ERROR("Non-integer 'source' for an edge in GML file.",
338 IGRAPH_PARSEERROR);
339 }
340 } else {
341 long int trieid, triesize = igraph_trie_size(&eattrnames);
342 IGRAPH_CHECK(igraph_trie_get(&eattrnames, name, &trieid));
343 if (trieid == triesize) {
344 /* new attribute */
345 igraph_attribute_record_t *atrec = IGRAPH_CALLOC(1, igraph_attribute_record_t);
346 int type = igraph_gml_tree_type(edge, j);
347 if (!atrec) {
348 IGRAPH_ERROR("Cannot read GML file.", IGRAPH_ENOMEM);
349 }
350 IGRAPH_CHECK(igraph_vector_ptr_push_back(&eattrs, atrec));
351 atrec->name = strdup(name);
352 if (type == IGRAPH_I_GML_TREE_INTEGER || type == IGRAPH_I_GML_TREE_REAL) {
353 atrec->type = IGRAPH_ATTRIBUTE_NUMERIC;
354 } else {
355 atrec->type = IGRAPH_ATTRIBUTE_STRING;
356 }
357 } else {
358 /* already seen, should we update type? */
359 igraph_attribute_record_t *atrec = VECTOR(eattrs)[trieid];
360 int type1 = atrec->type;
361 int type2 = igraph_gml_tree_type(edge, j);
362 if (type1 == IGRAPH_ATTRIBUTE_NUMERIC && type2 == IGRAPH_I_GML_TREE_STRING) {
363 atrec->type = IGRAPH_ATTRIBUTE_STRING;
364 }
365 }
366 }
367 } /* for */
368 if (!has_source) {
369 IGRAPH_ERROR("No 'source' for edge in GML file.", IGRAPH_PARSEERROR);
370 }
371 if (!has_target) {
372 IGRAPH_ERROR("No 'target' for edge in GML file.", IGRAPH_PARSEERROR);
373 }
374 } else {
375 /* anything to do? Maybe add as graph attribute.... */
376 }
377 }
378
379 /* check vertex id uniqueness */
380 if (igraph_trie_size(&trie) != no_of_nodes) {
381 IGRAPH_ERROR("Node 'id' not unique in GML file.", IGRAPH_PARSEERROR);
382 }
383
384 /* now we allocate the vectors and strvectors for the attributes */
385 for (i = 0; i < igraph_vector_ptr_size(&vattrs); i++) {
386 igraph_attribute_record_t *atrec = VECTOR(vattrs)[i];
387 int type = atrec->type;
388 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
389 igraph_vector_t *p = IGRAPH_CALLOC(1, igraph_vector_t);
390 atrec->value = p;
391 IGRAPH_CHECK(igraph_vector_init(p, no_of_nodes));
392 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
393 igraph_strvector_t *p = IGRAPH_CALLOC(1, igraph_strvector_t);
394 atrec->value = p;
395 IGRAPH_CHECK(igraph_strvector_init(p, no_of_nodes));
396 } else {
397 IGRAPH_WARNING("A composite attribute was ignored in the GML file.");
398 }
399 }
400
401 for (i = 0; i < igraph_vector_ptr_size(&eattrs); i++) {
402 igraph_attribute_record_t *atrec = VECTOR(eattrs)[i];
403 int type = atrec->type;
404 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
405 igraph_vector_t *p = IGRAPH_CALLOC(1, igraph_vector_t);
406 atrec->value = p;
407 IGRAPH_CHECK(igraph_vector_init(p, no_of_edges));
408 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
409 igraph_strvector_t *p = IGRAPH_CALLOC(1, igraph_strvector_t);
410 atrec->value = p;
411 IGRAPH_CHECK(igraph_strvector_init(p, no_of_edges));
412 } else {
413 IGRAPH_WARNING("A composite attribute was ignored in the GML file.");
414 }
415 }
416
417 /* Ok, now the edges, attributes too */
418 IGRAPH_CHECK(igraph_vector_resize(&edges, no_of_edges * 2));
419 p = -1;
420 while ( (p = igraph_gml_tree_find(gtree, "edge", p + 1)) != -1) {
421 igraph_gml_tree_t *edge;
422 long int from, to, fromidx = 0, toidx = 0;
423 char name[100];
424 long int j;
425 edge = igraph_gml_tree_get_tree(gtree, p);
426 for (j = 0; j < igraph_gml_tree_length(edge); j++) {
427 const char *n = igraph_gml_tree_name(edge, j);
428 if (!strcmp(n, "source")) {
429 fromidx = igraph_gml_tree_find(edge, "source", 0);
430 } else if (!strcmp(n, "target")) {
431 toidx = igraph_gml_tree_find(edge, "target", 0);
432 } else {
433 long int edgeid = edgeptr / 2;
434 long int trieidx;
435 igraph_attribute_record_t *atrec;
436 int type;
437 igraph_trie_get(&eattrnames, n, &trieidx);
438 atrec = VECTOR(eattrs)[trieidx];
439 type = atrec->type;
440 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
441 igraph_vector_t *v = (igraph_vector_t *)atrec->value;
442 IGRAPH_CHECK(igraph_i_gml_toreal(edge, j, VECTOR(*v) + edgeid));
443 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
444 igraph_strvector_t *v = (igraph_strvector_t *)atrec->value;
445 const char *value = igraph_i_gml_tostring(edge, j);
446 IGRAPH_CHECK(igraph_strvector_set(v, edgeid, value));
447 }
448 }
449 }
450 from = igraph_gml_tree_get_integer(edge, fromidx);
451 to = igraph_gml_tree_get_integer(edge, toidx);
452 snprintf(name, sizeof(name) / sizeof(char) -1, "%li", from);
453 IGRAPH_CHECK(igraph_trie_get(&trie, name, &from));
454 snprintf(name, sizeof(name) / sizeof(char) -1, "%li", to);
455 IGRAPH_CHECK(igraph_trie_get(&trie, name, &to));
456 if (igraph_trie_size(&trie) != no_of_nodes) {
457 IGRAPH_ERROR("Unknown node id found in an edge in GML file.", IGRAPH_PARSEERROR);
458 }
459 VECTOR(edges)[edgeptr++] = from;
460 VECTOR(edges)[edgeptr++] = to;
461 }
462
463 /* and add vertex attributes */
464 for (i = 0; i < igraph_gml_tree_length(gtree); i++) {
465 const char *n;
466 char name[100];
467 long int j, k;
468 n = igraph_gml_tree_name(gtree, i);
469 if (!strcmp(n, "node")) {
470 igraph_gml_tree_t *node = igraph_gml_tree_get_tree(gtree, i);
471 long int iidx = igraph_gml_tree_find(node, "id", 0);
472 long int id = igraph_gml_tree_get_integer(node, iidx);
473 snprintf(name, sizeof(name) / sizeof(char) -1, "%li", id);
474 igraph_trie_get(&trie, name, &id);
475 for (j = 0; j < igraph_gml_tree_length(node); j++) {
476 const char *aname = igraph_gml_tree_name(node, j);
477 igraph_attribute_record_t *atrec;
478 int type;
479 igraph_trie_get(&vattrnames, aname, &k);
480 atrec = VECTOR(vattrs)[k];
481 type = atrec->type;
482 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
483 igraph_vector_t *v = (igraph_vector_t *)atrec->value;
484 IGRAPH_CHECK(igraph_i_gml_toreal(node, j, VECTOR(*v) + id));
485 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
486 igraph_strvector_t *v = (igraph_strvector_t *)atrec->value;
487 const char *value = igraph_i_gml_tostring(node, j);
488 IGRAPH_CHECK(igraph_strvector_set(v, id, value));
489 }
490 }
491 }
492 }
493
494 igraph_trie_destroy(&trie);
495 igraph_trie_destroy(&gattrnames);
496 igraph_trie_destroy(&vattrnames);
497 igraph_trie_destroy(&eattrnames);
498 IGRAPH_FINALLY_CLEAN(4);
499
500 IGRAPH_CHECK(igraph_empty_attrs(graph, 0, directed, 0)); /* TODO */
501 IGRAPH_CHECK(igraph_add_vertices(graph, (igraph_integer_t) no_of_nodes,
502 &vattrs));
503 IGRAPH_CHECK(igraph_add_edges(graph, &edges, &eattrs));
504
505 igraph_i_gml_destroy_attrs(attrs);
506 igraph_vector_destroy(&edges);
507 igraph_i_gml_parsedata_destroy(&context);
508 IGRAPH_FINALLY_CLEAN(3);
509
510 return IGRAPH_SUCCESS;
511 }
512
igraph_i_gml_convert_to_key(const char * orig,char ** key)513 static int igraph_i_gml_convert_to_key(const char *orig, char **key) {
514 int no = 1;
515 char strno[50];
516 size_t i, len = strlen(orig), newlen = 0, plen = 0;
517
518 /* do we need a prefix? */
519 if (len == 0 || !isalpha(orig[0])) {
520 no++;
521 snprintf(strno, sizeof(strno) - 1, "igraph");
522 plen = newlen = strlen(strno);
523 }
524 for (i = 0; i < len; i++) {
525 if (isalnum(orig[i])) {
526 newlen++;
527 }
528 }
529 *key = IGRAPH_CALLOC(newlen + 1, char);
530 if (! *key) {
531 IGRAPH_ERROR("Writing GML format failed.", IGRAPH_ENOMEM);
532 }
533 memcpy(*key, strno, plen * sizeof(char));
534 for (i = 0; i < len; i++) {
535 if (isalnum(orig[i])) {
536 (*key)[plen++] = orig[i];
537 }
538 }
539 (*key)[newlen] = '\0';
540
541 return IGRAPH_SUCCESS;
542 }
543
544 #define CHECK(cmd) do { ret=cmd; if (ret<0) IGRAPH_ERROR("Writing GML format failed.", IGRAPH_EFILE); } while (0)
545
546 /**
547 * \function igraph_write_graph_gml
548 * \brief Write the graph to a stream in GML format
549 *
550 * GML is a quite general textual format, see
551 * http://www.fim.uni-passau.de/en/fim/faculty/chairs/theoretische-informatik/projects.html for details.
552 *
553 * </para><para> The graph, vertex and edges attributes are written to the
554 * file as well, if they are numeric or string.
555 *
556 * </para><para> As igraph is more forgiving about attribute names, it might
557 * be necessary to simplify the them before writing to the GML file.
558 * This way we'll have a syntactically correct GML file. The following
559 * simple procedure is performed on each attribute name: first the alphanumeric
560 * characters are extracted, the others are ignored. Then if the first character
561 * is not a letter then the attribute name is prefixed with <quote>igraph</quote>.
562 * Note that this might result identical names for two attributes, igraph
563 * does not check this.
564 *
565 * </para><para> The <quote>id</quote> vertex attribute is treated specially.
566 * If the <parameter>id</parameter> argument is not 0 then it should be a numeric
567 * vector with the vertex ids and the <quote>id</quote> vertex attribute is
568 * ignored (if there is one). If <parameter>id</parameter> is 0 and there is a
569 * numeric <quote>id</quote> vertex attribute that is used instead. If ids
570 * are not specified in either way then the regular igraph vertex ids are used.
571 *
572 * </para><para> Note that whichever way vertex ids are specified, their
573 * uniqueness is not checked.
574 *
575 * </para><para> If the graph has edge attributes named <quote>source</quote>
576 * or <quote>target</quote> they're silently ignored. GML uses these attributes
577 * to specify the edges, so we cannot write them to the file. Rename them
578 * before calling this function if you want to preserve them.
579 * \param graph The graph to write to the stream.
580 * \param outstream The stream to write the file to.
581 * \param id Either <code>NULL</code> or a numeric vector with the vertex ids.
582 * See details above.
583 * \param creator An optional string to write to the stream in the creator line.
584 * If this is 0 then the current date and time is added.
585 * \return Error code.
586 *
587 * Time complexity: should be proportional to the number of characters written
588 * to the file.
589 *
590 * \sa \ref igraph_read_graph_gml() for reading GML files,
591 * \ref igraph_read_graph_graphml() for a more modern format.
592 *
593 * \example examples/simple/gml.c
594 */
595
igraph_write_graph_gml(const igraph_t * graph,FILE * outstream,const igraph_vector_t * id,const char * creator)596 int igraph_write_graph_gml(const igraph_t *graph, FILE *outstream,
597 const igraph_vector_t *id, const char *creator) {
598 int ret;
599 igraph_strvector_t gnames, vnames, enames;
600 igraph_vector_t gtypes, vtypes, etypes;
601 igraph_vector_t numv;
602 igraph_strvector_t strv;
603 igraph_vector_bool_t boolv;
604 long int i;
605 long int no_of_nodes = igraph_vcount(graph);
606 long int no_of_edges = igraph_ecount(graph);
607
608 igraph_vector_t v_myid;
609 const igraph_vector_t *myid = id;
610
611 time_t curtime = time(0);
612 char *timestr = ctime(&curtime);
613 timestr[strlen(timestr) - 1] = '\0'; /* nicely remove \n */
614
615 CHECK(fprintf(outstream,
616 "Creator \"igraph version %s %s\"\nVersion 1\ngraph\n[\n",
617 IGRAPH_VERSION, creator ? creator : timestr));
618
619 IGRAPH_STRVECTOR_INIT_FINALLY(&gnames, 0);
620 IGRAPH_STRVECTOR_INIT_FINALLY(&vnames, 0);
621 IGRAPH_STRVECTOR_INIT_FINALLY(&enames, 0);
622 IGRAPH_VECTOR_INIT_FINALLY(>ypes, 0);
623 IGRAPH_VECTOR_INIT_FINALLY(&vtypes, 0);
624 IGRAPH_VECTOR_INIT_FINALLY(&etypes, 0);
625 IGRAPH_CHECK(igraph_i_attribute_get_info(graph,
626 &gnames, >ypes,
627 &vnames, &vtypes,
628 &enames, &etypes));
629
630 IGRAPH_VECTOR_INIT_FINALLY(&numv, 1);
631 IGRAPH_STRVECTOR_INIT_FINALLY(&strv, 1);
632 IGRAPH_VECTOR_BOOL_INIT_FINALLY(&boolv, 1);
633
634 /* Check whether there is an 'id' node attribute if the supplied is 0 */
635 if (!id) {
636 igraph_bool_t found = 0;
637 for (i = 0; i < igraph_vector_size(&vtypes); i++) {
638 char *n;
639 igraph_strvector_get(&vnames, i, &n);
640 if (!strcmp(n, "id") && VECTOR(vtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) {
641 found = 1; break;
642 }
643 }
644 if (found) {
645 IGRAPH_VECTOR_INIT_FINALLY(&v_myid, no_of_nodes);
646 IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, "id",
647 igraph_vss_all(),
648 &v_myid));
649 myid = &v_myid;
650 }
651 }
652
653 /* directedness */
654 CHECK(fprintf(outstream, " directed %i\n", igraph_is_directed(graph) ? 1 : 0));
655
656 /* Graph attributes first */
657 for (i = 0; i < igraph_vector_size(>ypes); i++) {
658 char *name, *newname;
659 igraph_strvector_get(&gnames, i, &name);
660 IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname));
661 IGRAPH_FINALLY(igraph_free, newname);
662 if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) {
663 IGRAPH_CHECK(igraph_i_attribute_get_numeric_graph_attr(graph, name, &numv));
664 CHECK(fprintf(outstream, " %s ", newname));
665 CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]));
666 CHECK(fputc('\n', outstream));
667 } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_STRING) {
668 char *s;
669 IGRAPH_CHECK(igraph_i_attribute_get_string_graph_attr(graph, name, &strv));
670 igraph_strvector_get(&strv, 0, &s);
671 CHECK(fprintf(outstream, " %s \"%s\"\n", newname, s));
672 } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_BOOLEAN) {
673 IGRAPH_CHECK(igraph_i_attribute_get_bool_graph_attr(graph, name, &boolv));
674 CHECK(fprintf(outstream, " %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0));
675 IGRAPH_WARNING("A boolean graph attribute was converted to numeric");
676 } else {
677 IGRAPH_WARNING("A non-numeric, non-string, non-boolean graph attribute ignored");
678 }
679 IGRAPH_FREE(newname);
680 IGRAPH_FINALLY_CLEAN(1);
681 }
682
683 /* Now come the vertices */
684 for (i = 0; i < no_of_nodes; i++) {
685 long int j;
686 CHECK(fprintf(outstream, " node\n [\n"));
687 /* id */
688 CHECK(fprintf(outstream, " id %li\n", myid ? (long int)VECTOR(*myid)[i] : i));
689 /* other attributes */
690 for (j = 0; j < igraph_vector_size(&vtypes); j++) {
691 int type = (int) VECTOR(vtypes)[j];
692 char *name, *newname;
693 igraph_strvector_get(&vnames, j, &name);
694 if (!strcmp(name, "id")) {
695 continue;
696 }
697 IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname));
698 IGRAPH_FINALLY(igraph_free, newname);
699 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
700 IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, name,
701 igraph_vss_1((igraph_integer_t) i), &numv));
702 CHECK(fprintf(outstream, " %s ", newname));
703 CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]));
704 CHECK(fputc('\n', outstream));
705 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
706 char *s;
707 IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, name,
708 igraph_vss_1((igraph_integer_t) i), &strv));
709 igraph_strvector_get(&strv, 0, &s);
710 CHECK(fprintf(outstream, " %s \"%s\"\n", newname, s));
711 } else if (type == IGRAPH_ATTRIBUTE_BOOLEAN) {
712 IGRAPH_CHECK(igraph_i_attribute_get_bool_vertex_attr(graph, name,
713 igraph_vss_1((igraph_integer_t) i), &boolv));
714 CHECK(fprintf(outstream, " %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0));
715 IGRAPH_WARNING("A boolean vertex attribute was converted to numeric");
716 } else {
717 IGRAPH_WARNING("A non-numeric, non-string, non-boolean edge attribute was ignored");
718 }
719 IGRAPH_FREE(newname);
720 IGRAPH_FINALLY_CLEAN(1);
721 }
722 CHECK(fprintf(outstream, " ]\n"));
723 }
724
725 /* The edges too */
726 for (i = 0; i < no_of_edges; i++) {
727 long int from = IGRAPH_FROM(graph, i);
728 long int to = IGRAPH_TO(graph, i);
729 long int j;
730 CHECK(fprintf(outstream, " edge\n [\n"));
731 /* source and target */
732 CHECK(fprintf(outstream, " source %li\n",
733 myid ? (long int)VECTOR(*myid)[from] : from));
734 CHECK(fprintf(outstream, " target %li\n",
735 myid ? (long int)VECTOR(*myid)[to] : to));
736
737 /* other attributes */
738 for (j = 0; j < igraph_vector_size(&etypes); j++) {
739 int type = (int) VECTOR(etypes)[j];
740 char *name, *newname;
741 igraph_strvector_get(&enames, j, &name);
742 if (!strcmp(name, "source") || !strcmp(name, "target")) {
743 continue;
744 }
745 IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname));
746 IGRAPH_FINALLY(igraph_free, newname);
747 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
748 IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, name,
749 igraph_ess_1((igraph_integer_t) i), &numv));
750 CHECK(fprintf(outstream, " %s ", newname));
751 CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]));
752 CHECK(fputc('\n', outstream));
753 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
754 char *s;
755 IGRAPH_CHECK(igraph_i_attribute_get_string_edge_attr(graph, name,
756 igraph_ess_1((igraph_integer_t) i), &strv));
757 igraph_strvector_get(&strv, 0, &s);
758 CHECK(fprintf(outstream, " %s \"%s\"\n", newname, s));
759 } else if (type == IGRAPH_ATTRIBUTE_BOOLEAN) {
760 IGRAPH_CHECK(igraph_i_attribute_get_bool_edge_attr(graph, name,
761 igraph_ess_1((igraph_integer_t) i), &boolv));
762 CHECK(fprintf(outstream, " %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0));
763 IGRAPH_WARNING("A boolean edge attribute was converted to numeric");
764 } else {
765 IGRAPH_WARNING("A non-numeric, non-string, non-boolean edge attribute was ignored");
766 }
767 IGRAPH_FREE(newname);
768 IGRAPH_FINALLY_CLEAN(1);
769 }
770 CHECK(fprintf(outstream, " ]\n"));
771 }
772
773 CHECK(fprintf(outstream, "]\n"));
774
775 if (&v_myid == myid) {
776 igraph_vector_destroy(&v_myid);
777 IGRAPH_FINALLY_CLEAN(1);
778 }
779
780 igraph_vector_bool_destroy(&boolv);
781 igraph_strvector_destroy(&strv);
782 igraph_vector_destroy(&numv);
783 igraph_vector_destroy(&etypes);
784 igraph_vector_destroy(&vtypes);
785 igraph_vector_destroy(>ypes);
786 igraph_strvector_destroy(&enames);
787 igraph_strvector_destroy(&vnames);
788 igraph_strvector_destroy(&gnames);
789 IGRAPH_FINALLY_CLEAN(9);
790
791 return IGRAPH_SUCCESS;
792 }
793
794 #undef CHECK
795