1 /* -*- mode: C -*-  */
2 /*
3    IGraph library.
4    Copyright (C) 2005-2020  The igraph development team
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19    02110-1301 USA
20 
21 */
22 
23 #include "igraph_foreign.h"
24 
25 #include "igraph_attributes.h"
26 #include "igraph_interface.h"
27 #include "igraph_memory.h"
28 #include "igraph_version.h"
29 
30 #include "core/trie.h"
31 #include "graph/attributes.h"
32 #include "io/gml-header.h"
33 
34 #include <ctype.h>
35 #include <time.h>
36 #include <string.h>
37 
38 int igraph_gml_yylex_init_extra (igraph_i_gml_parsedata_t* user_defined,
39                                  void* scanner);
40 void igraph_gml_yylex_destroy (void *scanner );
41 int igraph_gml_yyparse (igraph_i_gml_parsedata_t* context);
42 void igraph_gml_yyset_in  (FILE * in_str, void* yyscanner );
43 
igraph_i_gml_destroy_attrs(igraph_vector_ptr_t ** ptr)44 static void igraph_i_gml_destroy_attrs(igraph_vector_ptr_t **ptr) {
45     long int i;
46     igraph_vector_ptr_t *vec;
47     for (i = 0; i < 3; i++) {
48         long int j;
49         vec = ptr[i];
50         for (j = 0; j < igraph_vector_ptr_size(vec); j++) {
51             igraph_attribute_record_t *atrec = VECTOR(*vec)[j];
52             if (atrec->type == IGRAPH_ATTRIBUTE_NUMERIC) {
53                 igraph_vector_t *value = (igraph_vector_t*)atrec->value;
54                 if (value != 0) {
55                     igraph_vector_destroy(value);
56                     IGRAPH_FREE(value);
57                 }
58             } else {
59                 igraph_strvector_t *value = (igraph_strvector_t*)atrec->value;
60                 if (value != 0) {
61                     igraph_strvector_destroy(value);
62                     IGRAPH_FREE(value);
63                 }
64             }
65             IGRAPH_FREE(atrec->name);
66             IGRAPH_FREE(atrec);
67         }
68         igraph_vector_ptr_destroy(vec);
69     }
70 }
71 
igraph_i_gml_toreal(igraph_gml_tree_t * node,long int pos,igraph_real_t * result)72 static int igraph_i_gml_toreal(igraph_gml_tree_t *node, long int pos, igraph_real_t *result) {
73 
74     igraph_real_t value = 0.0;
75     int type = igraph_gml_tree_type(node, pos);
76 
77     switch (type) {
78     case IGRAPH_I_GML_TREE_INTEGER:
79         value = igraph_gml_tree_get_integer(node, pos);
80         break;
81     case IGRAPH_I_GML_TREE_REAL:
82         value = igraph_gml_tree_get_real(node, pos);
83         break;
84     default:
85         IGRAPH_ERROR("Internal error while parsing GML file.", IGRAPH_FAILURE);
86         break;
87     }
88 
89     *result = value;
90     return IGRAPH_SUCCESS;
91 }
92 
igraph_i_gml_tostring(igraph_gml_tree_t * node,long int pos)93 static const char *igraph_i_gml_tostring(igraph_gml_tree_t *node, long int pos) {
94 
95     int type = igraph_gml_tree_type(node, pos);
96     static char tmp[256];
97     const char *p = tmp;
98     long int i;
99     igraph_real_t d;
100 
101     switch (type) {
102     case IGRAPH_I_GML_TREE_INTEGER:
103         i = igraph_gml_tree_get_integer(node, pos);
104         snprintf(tmp, sizeof(tmp) / sizeof(char), "%li", i);
105         break;
106     case IGRAPH_I_GML_TREE_REAL:
107         d = igraph_gml_tree_get_real(node, pos);
108         igraph_real_snprintf_precise(tmp, sizeof(tmp) / sizeof(char), d);
109         break;
110     case IGRAPH_I_GML_TREE_STRING:
111         p = igraph_gml_tree_get_string(node, pos);
112         break;
113     default:
114         break;
115     }
116 
117     return p;
118 }
119 
igraph_i_gml_parsedata_init(igraph_i_gml_parsedata_t * context)120 int igraph_i_gml_parsedata_init(igraph_i_gml_parsedata_t* context) {
121     context->eof = 0;
122     context->depth = 0;
123     context->scanner = 0;
124     context->tree = 0;
125     context->errmsg[0] = 0;
126 
127     return IGRAPH_SUCCESS;
128 }
129 
igraph_i_gml_parsedata_destroy(igraph_i_gml_parsedata_t * context)130 void igraph_i_gml_parsedata_destroy(igraph_i_gml_parsedata_t* context) {
131     if (context->tree != 0) {
132         igraph_gml_tree_destroy(context->tree);
133         context->tree = 0;
134     }
135 
136     if (context->scanner != 0) {
137         igraph_gml_yylex_destroy(context->scanner);
138         context->scanner = 0;
139     }
140 }
141 
142 /**
143  * \function igraph_read_graph_gml
144  * \brief Read a graph in GML format.
145  *
146  * GML is a simple textual format, see
147  * http://www.fim.uni-passau.de/en/fim/faculty/chairs/theoretische-informatik/projects.html for details.
148  *
149  * </para><para>
150  * Although all syntactically correct GML can be parsed,
151  * we implement only a subset of this format, some attributes might be
152  * ignored. Here is a list of all the differences:
153  * \olist
154  * \oli Only <code>node</code> and <code>edge</code> attributes are
155  *      used, and only if they have a simple type: integer, real or
156  *      string. So if an attribute is an array or a record, then it is
157  *      ignored. This is also true if only some values of the
158  *      attribute are complex.
159  * \oli Top level attributes except for <code>Version</code> and the
160  *      first <code>graph</code> attribute are completely ignored.
161  * \oli Graph attributes except for <code>node</code> and
162  *      <code>edge</code> are completely ignored.
163  * \oli There is no maximum line length.
164  * \oli There is no maximum keyword length.
165  * \oli Character entities in strings are not interpreted.
166  * \oli We allow <code>inf</code> (infinity) and <code>nan</code>
167  *      (not a number) as a real number. This is case insensitive, so
168  *      <code>nan</code>, <code>NaN</code> and <code>NAN</code> are equal.
169  * \endolist
170  *
171  * </para><para> Please contact us if you cannot live with these
172  * limitations of the GML parser.
173  * \param graph Pointer to an uninitialized graph object.
174  * \param instream The stream to read the GML file from.
175  * \return Error code.
176  *
177  * Time complexity: should be proportional to the length of the file.
178  *
179  * \sa \ref igraph_read_graph_graphml() for a more modern format,
180  * \ref igraph_write_graph_gml() for writing GML files.
181  *
182  * \example examples/simple/gml.c
183  */
igraph_read_graph_gml(igraph_t * graph,FILE * instream)184 int igraph_read_graph_gml(igraph_t *graph, FILE *instream) {
185 
186     long int i, p;
187     long int no_of_nodes = 0, no_of_edges = 0;
188     igraph_trie_t trie;
189     igraph_vector_t edges;
190     igraph_bool_t directed = IGRAPH_UNDIRECTED;
191     igraph_gml_tree_t *gtree;
192     long int gidx;
193     igraph_trie_t vattrnames;
194     igraph_trie_t eattrnames;
195     igraph_trie_t gattrnames;
196     igraph_vector_ptr_t gattrs = IGRAPH_VECTOR_PTR_NULL,
197                         vattrs = IGRAPH_VECTOR_PTR_NULL, eattrs = IGRAPH_VECTOR_PTR_NULL;
198     igraph_vector_ptr_t *attrs[3];
199     long int edgeptr = 0;
200     igraph_i_gml_parsedata_t context;
201 
202     attrs[0] = &gattrs; attrs[1] = &vattrs; attrs[2] = &eattrs;
203 
204     IGRAPH_CHECK(igraph_i_gml_parsedata_init(&context));
205     IGRAPH_FINALLY(igraph_i_gml_parsedata_destroy, &context);
206 
207     igraph_gml_yylex_init_extra(&context, &context.scanner);
208 
209     igraph_gml_yyset_in(instream, context.scanner);
210 
211     i = igraph_gml_yyparse(&context);
212     if (i != 0) {
213         if (context.errmsg[0] != 0) {
214             IGRAPH_ERROR(context.errmsg, IGRAPH_PARSEERROR);
215         } else {
216             IGRAPH_ERROR("Cannot read GML file.", IGRAPH_PARSEERROR);
217         }
218     }
219 
220     IGRAPH_VECTOR_INIT_FINALLY(&edges, 0);
221 
222     /* Check version, if present, integer and not '1' then ignored */
223     i = igraph_gml_tree_find(context.tree, "Version", 0);
224     if (i >= 0 &&
225         igraph_gml_tree_type(context.tree, i) == IGRAPH_I_GML_TREE_INTEGER &&
226         igraph_gml_tree_get_integer(context.tree, i) != 1) {
227         IGRAPH_ERROR("Unknown GML version.", IGRAPH_UNIMPLEMENTED);
228         /* RETURN HERE!!!! */
229     }
230 
231     /* get the graph */
232     gidx = igraph_gml_tree_find(context.tree, "graph", 0);
233     if (gidx == -1) {
234         IGRAPH_ERROR("No 'graph' object in GML file.", IGRAPH_PARSEERROR);
235     }
236     if (igraph_gml_tree_type(context.tree, gidx) !=
237         IGRAPH_I_GML_TREE_TREE) {
238         IGRAPH_ERROR("Invalid type for 'graph' object in GML file.", IGRAPH_PARSEERROR);
239     }
240     gtree = igraph_gml_tree_get_tree(context.tree, gidx);
241 
242     IGRAPH_FINALLY(igraph_i_gml_destroy_attrs, attrs);
243     igraph_vector_ptr_init(&gattrs, 0);
244     igraph_vector_ptr_init(&vattrs, 0);
245     igraph_vector_ptr_init(&eattrs, 0);
246 
247     IGRAPH_TRIE_INIT_FINALLY(&trie, 0);
248     IGRAPH_TRIE_INIT_FINALLY(&vattrnames, 0);
249     IGRAPH_TRIE_INIT_FINALLY(&eattrnames, 0);
250     IGRAPH_TRIE_INIT_FINALLY(&gattrnames, 0);
251 
252     /* Is is directed? */
253     i = igraph_gml_tree_find(gtree, "directed", 0);
254     if (i >= 0 && igraph_gml_tree_type(gtree, i) == IGRAPH_I_GML_TREE_INTEGER) {
255         if (igraph_gml_tree_get_integer(gtree, i) == 1) {
256             directed = IGRAPH_DIRECTED;
257         }
258     }
259 
260     /* Now we go over all objects in the graph and collect the attribute names and
261        types. Plus we collect node ids. We also do some checks. */
262     for (i = 0; i < igraph_gml_tree_length(gtree); i++) {
263         long int j;
264         char cname[100];
265         const char *name = igraph_gml_tree_name(gtree, i);
266         if (!strcmp(name, "node")) {
267             igraph_gml_tree_t *node;
268             igraph_bool_t hasid;
269             no_of_nodes++;
270             if (igraph_gml_tree_type(gtree, i) != IGRAPH_I_GML_TREE_TREE) {
271                 IGRAPH_ERROR("'node' is not a list in GML file.", IGRAPH_PARSEERROR);
272             }
273             node = igraph_gml_tree_get_tree(gtree, i);
274             hasid = 0;
275             for (j = 0; j < igraph_gml_tree_length(node); j++) {
276                 const char *name = igraph_gml_tree_name(node, j);
277                 long int trieid, triesize = igraph_trie_size(&vattrnames);
278                 IGRAPH_CHECK(igraph_trie_get(&vattrnames, name, &trieid));
279                 if (trieid == triesize) {
280                     /* new attribute */
281                     igraph_attribute_record_t *atrec = IGRAPH_CALLOC(1, igraph_attribute_record_t);
282                     int type = igraph_gml_tree_type(node, j);
283                     if (!atrec) {
284                         IGRAPH_ERROR("Cannot read GML file.", IGRAPH_ENOMEM);
285                     }
286                     IGRAPH_CHECK(igraph_vector_ptr_push_back(&vattrs, atrec));
287                     atrec->name = strdup(name);
288                     if (type == IGRAPH_I_GML_TREE_INTEGER || type == IGRAPH_I_GML_TREE_REAL) {
289                         atrec->type = IGRAPH_ATTRIBUTE_NUMERIC;
290                     } else {
291                         atrec->type = IGRAPH_ATTRIBUTE_STRING;
292                     }
293                 } else {
294                     /* already seen, should we update type? */
295                     igraph_attribute_record_t *atrec = VECTOR(vattrs)[trieid];
296                     int type1 = atrec->type;
297                     int type2 = igraph_gml_tree_type(node, j);
298                     if (type1 == IGRAPH_ATTRIBUTE_NUMERIC && type2 == IGRAPH_I_GML_TREE_STRING) {
299                         atrec->type = IGRAPH_ATTRIBUTE_STRING;
300                     }
301                 }
302                 /* check id */
303                 if (!hasid && !strcmp(name, "id")) {
304                     long int id;
305                     if (igraph_gml_tree_type(node, j) != IGRAPH_I_GML_TREE_INTEGER) {
306                         IGRAPH_ERROR("Non-integer node id in GML file.", IGRAPH_PARSEERROR);
307                     }
308                     id = igraph_gml_tree_get_integer(node, j);
309                     snprintf(cname, sizeof(cname) / sizeof(char) -1, "%li", id);
310                     IGRAPH_CHECK(igraph_trie_get(&trie, cname, &id));
311                     hasid = 1;
312                 }
313             }
314             if (!hasid) {
315                 IGRAPH_ERROR("Node without 'id' while parsing GML file.", IGRAPH_PARSEERROR);
316             }
317         } else if (!strcmp(name, "edge")) {
318             igraph_gml_tree_t *edge;
319             igraph_bool_t has_source = 0, has_target = 0;
320             no_of_edges++;
321             if (igraph_gml_tree_type(gtree, i) != IGRAPH_I_GML_TREE_TREE) {
322                 IGRAPH_ERROR("'edge' is not a list in GML file.", IGRAPH_PARSEERROR);
323             }
324             edge = igraph_gml_tree_get_tree(gtree, i);
325             has_source = has_target = 0;
326             for (j = 0; j < igraph_gml_tree_length(edge); j++) {
327                 const char *name = igraph_gml_tree_name(edge, j);
328                 if (!strcmp(name, "source")) {
329                     has_source = 1;
330                     if (igraph_gml_tree_type(edge, j) != IGRAPH_I_GML_TREE_INTEGER) {
331                         IGRAPH_ERROR("Non-integer 'source' for an edge in GML file.",
332                                      IGRAPH_PARSEERROR);
333                     }
334                 } else if (!strcmp(name, "target")) {
335                     has_target = 1;
336                     if (igraph_gml_tree_type(edge, j) != IGRAPH_I_GML_TREE_INTEGER) {
337                         IGRAPH_ERROR("Non-integer 'source' for an edge in GML file.",
338                                      IGRAPH_PARSEERROR);
339                     }
340                 } else {
341                     long int trieid, triesize = igraph_trie_size(&eattrnames);
342                     IGRAPH_CHECK(igraph_trie_get(&eattrnames, name, &trieid));
343                     if (trieid == triesize) {
344                         /* new attribute */
345                         igraph_attribute_record_t *atrec = IGRAPH_CALLOC(1, igraph_attribute_record_t);
346                         int type = igraph_gml_tree_type(edge, j);
347                         if (!atrec) {
348                             IGRAPH_ERROR("Cannot read GML file.", IGRAPH_ENOMEM);
349                         }
350                         IGRAPH_CHECK(igraph_vector_ptr_push_back(&eattrs, atrec));
351                         atrec->name = strdup(name);
352                         if (type == IGRAPH_I_GML_TREE_INTEGER || type == IGRAPH_I_GML_TREE_REAL) {
353                             atrec->type = IGRAPH_ATTRIBUTE_NUMERIC;
354                         } else {
355                             atrec->type = IGRAPH_ATTRIBUTE_STRING;
356                         }
357                     } else {
358                         /* already seen, should we update type? */
359                         igraph_attribute_record_t *atrec = VECTOR(eattrs)[trieid];
360                         int type1 = atrec->type;
361                         int type2 = igraph_gml_tree_type(edge, j);
362                         if (type1 == IGRAPH_ATTRIBUTE_NUMERIC && type2 == IGRAPH_I_GML_TREE_STRING) {
363                             atrec->type = IGRAPH_ATTRIBUTE_STRING;
364                         }
365                     }
366                 }
367             } /* for */
368             if (!has_source) {
369                 IGRAPH_ERROR("No 'source' for edge in GML file.", IGRAPH_PARSEERROR);
370             }
371             if (!has_target) {
372                 IGRAPH_ERROR("No 'target' for edge in GML file.", IGRAPH_PARSEERROR);
373             }
374         } else {
375             /* anything to do? Maybe add as graph attribute.... */
376         }
377     }
378 
379     /* check vertex id uniqueness */
380     if (igraph_trie_size(&trie) != no_of_nodes) {
381         IGRAPH_ERROR("Node 'id' not unique in GML file.", IGRAPH_PARSEERROR);
382     }
383 
384     /* now we allocate the vectors and strvectors for the attributes */
385     for (i = 0; i < igraph_vector_ptr_size(&vattrs); i++) {
386         igraph_attribute_record_t *atrec = VECTOR(vattrs)[i];
387         int type = atrec->type;
388         if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
389             igraph_vector_t *p = IGRAPH_CALLOC(1, igraph_vector_t);
390             atrec->value = p;
391             IGRAPH_CHECK(igraph_vector_init(p, no_of_nodes));
392         } else if (type == IGRAPH_ATTRIBUTE_STRING) {
393             igraph_strvector_t *p = IGRAPH_CALLOC(1, igraph_strvector_t);
394             atrec->value = p;
395             IGRAPH_CHECK(igraph_strvector_init(p, no_of_nodes));
396         } else {
397             IGRAPH_WARNING("A composite attribute was ignored in the GML file.");
398         }
399     }
400 
401     for (i = 0; i < igraph_vector_ptr_size(&eattrs); i++) {
402         igraph_attribute_record_t *atrec = VECTOR(eattrs)[i];
403         int type = atrec->type;
404         if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
405             igraph_vector_t *p = IGRAPH_CALLOC(1, igraph_vector_t);
406             atrec->value = p;
407             IGRAPH_CHECK(igraph_vector_init(p, no_of_edges));
408         } else if (type == IGRAPH_ATTRIBUTE_STRING) {
409             igraph_strvector_t *p = IGRAPH_CALLOC(1, igraph_strvector_t);
410             atrec->value = p;
411             IGRAPH_CHECK(igraph_strvector_init(p, no_of_edges));
412         } else {
413             IGRAPH_WARNING("A composite attribute was ignored in the GML file.");
414         }
415     }
416 
417     /* Ok, now the edges, attributes too */
418     IGRAPH_CHECK(igraph_vector_resize(&edges, no_of_edges * 2));
419     p = -1;
420     while ( (p = igraph_gml_tree_find(gtree, "edge", p + 1)) != -1) {
421         igraph_gml_tree_t *edge;
422         long int from, to, fromidx = 0, toidx = 0;
423         char name[100];
424         long int j;
425         edge = igraph_gml_tree_get_tree(gtree, p);
426         for (j = 0; j < igraph_gml_tree_length(edge); j++) {
427             const char *n = igraph_gml_tree_name(edge, j);
428             if (!strcmp(n, "source")) {
429                 fromidx = igraph_gml_tree_find(edge, "source", 0);
430             } else if (!strcmp(n, "target")) {
431                 toidx = igraph_gml_tree_find(edge, "target", 0);
432             } else {
433                 long int edgeid = edgeptr / 2;
434                 long int trieidx;
435                 igraph_attribute_record_t *atrec;
436                 int type;
437                 igraph_trie_get(&eattrnames, n, &trieidx);
438                 atrec = VECTOR(eattrs)[trieidx];
439                 type = atrec->type;
440                 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
441                     igraph_vector_t *v = (igraph_vector_t *)atrec->value;
442                     IGRAPH_CHECK(igraph_i_gml_toreal(edge, j, VECTOR(*v) + edgeid));
443                 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
444                     igraph_strvector_t *v = (igraph_strvector_t *)atrec->value;
445                     const char *value = igraph_i_gml_tostring(edge, j);
446                     IGRAPH_CHECK(igraph_strvector_set(v, edgeid, value));
447                 }
448             }
449         }
450         from = igraph_gml_tree_get_integer(edge, fromidx);
451         to = igraph_gml_tree_get_integer(edge, toidx);
452         snprintf(name, sizeof(name) / sizeof(char) -1, "%li", from);
453         IGRAPH_CHECK(igraph_trie_get(&trie, name, &from));
454         snprintf(name, sizeof(name) / sizeof(char) -1, "%li", to);
455         IGRAPH_CHECK(igraph_trie_get(&trie, name, &to));
456         if (igraph_trie_size(&trie) != no_of_nodes) {
457             IGRAPH_ERROR("Unknown node id found in an edge in GML file.", IGRAPH_PARSEERROR);
458         }
459         VECTOR(edges)[edgeptr++] = from;
460         VECTOR(edges)[edgeptr++] = to;
461     }
462 
463     /* and add vertex attributes */
464     for (i = 0; i < igraph_gml_tree_length(gtree); i++) {
465         const char *n;
466         char name[100];
467         long int j, k;
468         n = igraph_gml_tree_name(gtree, i);
469         if (!strcmp(n, "node")) {
470             igraph_gml_tree_t *node = igraph_gml_tree_get_tree(gtree, i);
471             long int iidx = igraph_gml_tree_find(node, "id", 0);
472             long int id = igraph_gml_tree_get_integer(node, iidx);
473             snprintf(name, sizeof(name) / sizeof(char) -1, "%li", id);
474             igraph_trie_get(&trie, name, &id);
475             for (j = 0; j < igraph_gml_tree_length(node); j++) {
476                 const char *aname = igraph_gml_tree_name(node, j);
477                 igraph_attribute_record_t *atrec;
478                 int type;
479                 igraph_trie_get(&vattrnames, aname, &k);
480                 atrec = VECTOR(vattrs)[k];
481                 type = atrec->type;
482                 if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
483                     igraph_vector_t *v = (igraph_vector_t *)atrec->value;
484                     IGRAPH_CHECK(igraph_i_gml_toreal(node, j, VECTOR(*v) + id));
485                 } else if (type == IGRAPH_ATTRIBUTE_STRING) {
486                     igraph_strvector_t *v = (igraph_strvector_t *)atrec->value;
487                     const char *value = igraph_i_gml_tostring(node, j);
488                     IGRAPH_CHECK(igraph_strvector_set(v, id, value));
489                 }
490             }
491         }
492     }
493 
494     igraph_trie_destroy(&trie);
495     igraph_trie_destroy(&gattrnames);
496     igraph_trie_destroy(&vattrnames);
497     igraph_trie_destroy(&eattrnames);
498     IGRAPH_FINALLY_CLEAN(4);
499 
500     IGRAPH_CHECK(igraph_empty_attrs(graph, 0, directed, 0)); /* TODO */
501     IGRAPH_CHECK(igraph_add_vertices(graph, (igraph_integer_t) no_of_nodes,
502                                      &vattrs));
503     IGRAPH_CHECK(igraph_add_edges(graph, &edges, &eattrs));
504 
505     igraph_i_gml_destroy_attrs(attrs);
506     igraph_vector_destroy(&edges);
507     igraph_i_gml_parsedata_destroy(&context);
508     IGRAPH_FINALLY_CLEAN(3);
509 
510     return IGRAPH_SUCCESS;
511 }
512 
igraph_i_gml_convert_to_key(const char * orig,char ** key)513 static int igraph_i_gml_convert_to_key(const char *orig, char **key) {
514     int no = 1;
515     char strno[50];
516     size_t i, len = strlen(orig), newlen = 0, plen = 0;
517 
518     /* do we need a prefix? */
519     if (len == 0 || !isalpha(orig[0])) {
520         no++;
521         snprintf(strno, sizeof(strno) - 1, "igraph");
522         plen = newlen = strlen(strno);
523     }
524     for (i = 0; i < len; i++) {
525         if (isalnum(orig[i])) {
526             newlen++;
527         }
528     }
529     *key = IGRAPH_CALLOC(newlen + 1, char);
530     if (! *key) {
531         IGRAPH_ERROR("Writing GML format failed.", IGRAPH_ENOMEM);
532     }
533     memcpy(*key, strno, plen * sizeof(char));
534     for (i = 0; i < len; i++) {
535         if (isalnum(orig[i])) {
536             (*key)[plen++] = orig[i];
537         }
538     }
539     (*key)[newlen] = '\0';
540 
541     return IGRAPH_SUCCESS;
542 }
543 
544 #define CHECK(cmd) do { ret=cmd; if (ret<0) IGRAPH_ERROR("Writing GML format failed.", IGRAPH_EFILE); } while (0)
545 
546 /**
547  * \function igraph_write_graph_gml
548  * \brief Write the graph to a stream in GML format
549  *
550  * GML is a quite general textual format, see
551  * http://www.fim.uni-passau.de/en/fim/faculty/chairs/theoretische-informatik/projects.html for details.
552  *
553  * </para><para> The graph, vertex and edges attributes are written to the
554  * file as well, if they are numeric or string.
555  *
556  * </para><para> As igraph is more forgiving about attribute names, it might
557  * be necessary to simplify the them before writing to the GML file.
558  * This way we'll have a syntactically correct GML file. The following
559  * simple procedure is performed on each attribute name: first the alphanumeric
560  * characters are extracted, the others are ignored. Then if the first character
561  * is not a letter then the attribute name is prefixed with <quote>igraph</quote>.
562  * Note that this might result identical names for two attributes, igraph
563  * does not check this.
564  *
565  * </para><para> The <quote>id</quote> vertex attribute is treated specially.
566  * If the <parameter>id</parameter> argument is not 0 then it should be a numeric
567  * vector with the vertex ids and the <quote>id</quote> vertex attribute is
568  * ignored (if there is one). If <parameter>id</parameter> is 0 and there is a
569  * numeric <quote>id</quote> vertex attribute that is used instead. If ids
570  * are not specified in either way then the regular igraph vertex ids are used.
571  *
572  * </para><para> Note that whichever way vertex ids are specified, their
573  * uniqueness is not checked.
574  *
575  * </para><para> If the graph has edge attributes named <quote>source</quote>
576  * or <quote>target</quote> they're silently ignored. GML uses these attributes
577  * to specify the edges, so we cannot write them to the file. Rename them
578  * before calling this function if you want to preserve them.
579  * \param graph The graph to write to the stream.
580  * \param outstream The stream to write the file to.
581  * \param id Either <code>NULL</code> or a numeric vector with the vertex ids.
582  *        See details above.
583  * \param creator An optional string to write to the stream in the creator line.
584  *        If this is 0 then the current date and time is added.
585  * \return Error code.
586  *
587  * Time complexity: should be proportional to the number of characters written
588  * to the file.
589  *
590  * \sa \ref igraph_read_graph_gml() for reading GML files,
591  * \ref igraph_read_graph_graphml() for a more modern format.
592  *
593  * \example examples/simple/gml.c
594  */
595 
igraph_write_graph_gml(const igraph_t * graph,FILE * outstream,const igraph_vector_t * id,const char * creator)596 int igraph_write_graph_gml(const igraph_t *graph, FILE *outstream,
597                            const igraph_vector_t *id, const char *creator) {
598     int ret;
599     igraph_strvector_t gnames, vnames, enames;
600     igraph_vector_t gtypes, vtypes, etypes;
601     igraph_vector_t numv;
602     igraph_strvector_t strv;
603     igraph_vector_bool_t boolv;
604     long int i;
605     long int no_of_nodes = igraph_vcount(graph);
606     long int no_of_edges = igraph_ecount(graph);
607 
608     igraph_vector_t v_myid;
609     const igraph_vector_t *myid = id;
610 
611     time_t curtime = time(0);
612     char *timestr = ctime(&curtime);
613     timestr[strlen(timestr) - 1] = '\0'; /* nicely remove \n */
614 
615     CHECK(fprintf(outstream,
616                   "Creator \"igraph version %s %s\"\nVersion 1\ngraph\n[\n",
617                   IGRAPH_VERSION, creator ? creator : timestr));
618 
619     IGRAPH_STRVECTOR_INIT_FINALLY(&gnames, 0);
620     IGRAPH_STRVECTOR_INIT_FINALLY(&vnames, 0);
621     IGRAPH_STRVECTOR_INIT_FINALLY(&enames, 0);
622     IGRAPH_VECTOR_INIT_FINALLY(&gtypes, 0);
623     IGRAPH_VECTOR_INIT_FINALLY(&vtypes, 0);
624     IGRAPH_VECTOR_INIT_FINALLY(&etypes, 0);
625     IGRAPH_CHECK(igraph_i_attribute_get_info(graph,
626                  &gnames, &gtypes,
627                  &vnames, &vtypes,
628                  &enames, &etypes));
629 
630     IGRAPH_VECTOR_INIT_FINALLY(&numv, 1);
631     IGRAPH_STRVECTOR_INIT_FINALLY(&strv, 1);
632     IGRAPH_VECTOR_BOOL_INIT_FINALLY(&boolv, 1);
633 
634     /* Check whether there is an 'id' node attribute if the supplied is 0 */
635     if (!id) {
636         igraph_bool_t found = 0;
637         for (i = 0; i < igraph_vector_size(&vtypes); i++) {
638             char *n;
639             igraph_strvector_get(&vnames, i, &n);
640             if (!strcmp(n, "id") && VECTOR(vtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) {
641                 found = 1; break;
642             }
643         }
644         if (found) {
645             IGRAPH_VECTOR_INIT_FINALLY(&v_myid, no_of_nodes);
646             IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, "id",
647                          igraph_vss_all(),
648                          &v_myid));
649             myid = &v_myid;
650         }
651     }
652 
653     /* directedness */
654     CHECK(fprintf(outstream, "  directed %i\n", igraph_is_directed(graph) ? 1 : 0));
655 
656     /* Graph attributes first */
657     for (i = 0; i < igraph_vector_size(&gtypes); i++) {
658         char *name, *newname;
659         igraph_strvector_get(&gnames, i, &name);
660         IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname));
661         IGRAPH_FINALLY(igraph_free, newname);
662         if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) {
663             IGRAPH_CHECK(igraph_i_attribute_get_numeric_graph_attr(graph, name, &numv));
664             CHECK(fprintf(outstream, "  %s ", newname));
665             CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]));
666             CHECK(fputc('\n', outstream));
667         } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_STRING) {
668             char *s;
669             IGRAPH_CHECK(igraph_i_attribute_get_string_graph_attr(graph, name, &strv));
670             igraph_strvector_get(&strv, 0, &s);
671             CHECK(fprintf(outstream, "  %s \"%s\"\n", newname, s));
672         } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_BOOLEAN) {
673             IGRAPH_CHECK(igraph_i_attribute_get_bool_graph_attr(graph, name, &boolv));
674             CHECK(fprintf(outstream, "  %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0));
675             IGRAPH_WARNING("A boolean graph attribute was converted to numeric");
676         } else {
677             IGRAPH_WARNING("A non-numeric, non-string, non-boolean graph attribute ignored");
678         }
679         IGRAPH_FREE(newname);
680         IGRAPH_FINALLY_CLEAN(1);
681     }
682 
683     /* Now come the vertices */
684     for (i = 0; i < no_of_nodes; i++) {
685         long int j;
686         CHECK(fprintf(outstream, "  node\n  [\n"));
687         /* id */
688         CHECK(fprintf(outstream, "    id %li\n", myid ? (long int)VECTOR(*myid)[i] : i));
689         /* other attributes */
690         for (j = 0; j < igraph_vector_size(&vtypes); j++) {
691             int type = (int) VECTOR(vtypes)[j];
692             char *name, *newname;
693             igraph_strvector_get(&vnames, j, &name);
694             if (!strcmp(name, "id")) {
695                 continue;
696             }
697             IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname));
698             IGRAPH_FINALLY(igraph_free, newname);
699             if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
700                 IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, name,
701                              igraph_vss_1((igraph_integer_t) i), &numv));
702                 CHECK(fprintf(outstream, "    %s ", newname));
703                 CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]));
704                 CHECK(fputc('\n', outstream));
705             } else if (type == IGRAPH_ATTRIBUTE_STRING) {
706                 char *s;
707                 IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, name,
708                              igraph_vss_1((igraph_integer_t) i), &strv));
709                 igraph_strvector_get(&strv, 0, &s);
710                 CHECK(fprintf(outstream, "    %s \"%s\"\n", newname, s));
711             } else if (type == IGRAPH_ATTRIBUTE_BOOLEAN) {
712                 IGRAPH_CHECK(igraph_i_attribute_get_bool_vertex_attr(graph, name,
713                              igraph_vss_1((igraph_integer_t) i), &boolv));
714                 CHECK(fprintf(outstream, "    %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0));
715                 IGRAPH_WARNING("A boolean vertex attribute was converted to numeric");
716             } else {
717                 IGRAPH_WARNING("A non-numeric, non-string, non-boolean edge attribute was ignored");
718             }
719             IGRAPH_FREE(newname);
720             IGRAPH_FINALLY_CLEAN(1);
721         }
722         CHECK(fprintf(outstream, "  ]\n"));
723     }
724 
725     /* The edges too */
726     for (i = 0; i < no_of_edges; i++) {
727         long int from = IGRAPH_FROM(graph, i);
728         long int to = IGRAPH_TO(graph, i);
729         long int j;
730         CHECK(fprintf(outstream, "  edge\n  [\n"));
731         /* source and target */
732         CHECK(fprintf(outstream, "    source %li\n",
733                       myid ? (long int)VECTOR(*myid)[from] : from));
734         CHECK(fprintf(outstream, "    target %li\n",
735                       myid ? (long int)VECTOR(*myid)[to] : to));
736 
737         /* other attributes */
738         for (j = 0; j < igraph_vector_size(&etypes); j++) {
739             int type = (int) VECTOR(etypes)[j];
740             char *name, *newname;
741             igraph_strvector_get(&enames, j, &name);
742             if (!strcmp(name, "source") || !strcmp(name, "target")) {
743                 continue;
744             }
745             IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname));
746             IGRAPH_FINALLY(igraph_free, newname);
747             if (type == IGRAPH_ATTRIBUTE_NUMERIC) {
748                 IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, name,
749                              igraph_ess_1((igraph_integer_t) i), &numv));
750                 CHECK(fprintf(outstream, "    %s ", newname));
751                 CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]));
752                 CHECK(fputc('\n', outstream));
753             } else if (type == IGRAPH_ATTRIBUTE_STRING) {
754                 char *s;
755                 IGRAPH_CHECK(igraph_i_attribute_get_string_edge_attr(graph, name,
756                              igraph_ess_1((igraph_integer_t) i), &strv));
757                 igraph_strvector_get(&strv, 0, &s);
758                 CHECK(fprintf(outstream, "    %s \"%s\"\n", newname, s));
759             } else if (type == IGRAPH_ATTRIBUTE_BOOLEAN) {
760                 IGRAPH_CHECK(igraph_i_attribute_get_bool_edge_attr(graph, name,
761                              igraph_ess_1((igraph_integer_t) i), &boolv));
762                 CHECK(fprintf(outstream, "    %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0));
763                 IGRAPH_WARNING("A boolean edge attribute was converted to numeric");
764             } else {
765                 IGRAPH_WARNING("A non-numeric, non-string, non-boolean edge attribute was ignored");
766             }
767             IGRAPH_FREE(newname);
768             IGRAPH_FINALLY_CLEAN(1);
769         }
770         CHECK(fprintf(outstream, "  ]\n"));
771     }
772 
773     CHECK(fprintf(outstream, "]\n"));
774 
775     if (&v_myid == myid) {
776         igraph_vector_destroy(&v_myid);
777         IGRAPH_FINALLY_CLEAN(1);
778     }
779 
780     igraph_vector_bool_destroy(&boolv);
781     igraph_strvector_destroy(&strv);
782     igraph_vector_destroy(&numv);
783     igraph_vector_destroy(&etypes);
784     igraph_vector_destroy(&vtypes);
785     igraph_vector_destroy(&gtypes);
786     igraph_strvector_destroy(&enames);
787     igraph_strvector_destroy(&vnames);
788     igraph_strvector_destroy(&gnames);
789     IGRAPH_FINALLY_CLEAN(9);
790 
791     return IGRAPH_SUCCESS;
792 }
793 
794 #undef CHECK
795