1 /** \file
2  * \brief Implementation of GEXF format parsing utilities.
3  *
4  * \author Łukasz Hanuszczak, Tilo Wiedera
5  *
6  * \par License:
7  * This file is part of the Open Graph Drawing Framework (OGDF).
8  *
9  * \par
10  * Copyright (C)<br>
11  * See README.md in the OGDF root directory for details.
12  *
13  * \par
14  * This program is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU General Public License
16  * Version 2 or 3 as published by the Free Software Foundation;
17  * see the file LICENSE.txt included in the packaging of this file
18  * for details.
19  *
20  * \par
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * \par
27  * You should have received a copy of the GNU General Public
28  * License along with this program; if not, see
29  * http://www.gnu.org/copyleft/gpl.html
30  */
31 
32 #include <ogdf/fileformats/GEXF.h>
33 #include <ogdf/fileformats/GexfParser.h>
34 #include <ogdf/fileformats/GraphML.h>
35 #include <ogdf/fileformats/GraphIO.h>
36 
37 
38 namespace ogdf {
39 
40 namespace gexf {
41 
42 
Parser(std::istream & is)43 Parser::Parser(std::istream &is) : m_is(is)
44 {
45 }
46 
47 
readAttrDefs(std::unordered_map<std::string,std::string> & attrMap,const pugi::xml_node attrsTag)48 static inline bool readAttrDefs(
49 	std::unordered_map<std::string,
50 	std::string> &attrMap,
51 	const pugi::xml_node attrsTag)
52 {
53 	for (auto attrTag : attrsTag.children("attribute")) {
54 		pugi::xml_attribute idAttr = attrTag.attribute("id");
55 		pugi::xml_attribute idTitle = attrTag.attribute("title");
56 
57 		if(!idAttr || !idTitle)
58 		{
59 			GraphIO::logger.lout() << "\"id\" or \"title\" attribute missing." << std::endl;
60 			return false;
61 		}
62 
63 
64 		attrMap[idAttr.value()] = idTitle.value();
65 	}
66 
67 	return true;
68 }
69 
init()70 bool Parser::init()
71 {
72 	pugi::xml_parse_result result = m_xml.load(m_is);
73 
74 	if (!result) {
75 		GraphIO::logger.lout() << "XML parser error: " << result.description() << std::endl;
76 		return false;
77 	}
78 
79 	m_nodeId.clear();
80 	m_clusterId.clear();
81 	m_nodeAttr.clear();
82 	m_edgeAttr.clear();
83 
84 	pugi::xml_node rootNode = m_xml.child("gexf");
85 
86 	if(!rootNode) {
87 		GraphIO::logger.lout() << "Root tag must be \"gexf\"." << std::endl;
88 		return false;
89 	}
90 
91 	m_graphTag = rootNode.child("graph");
92 	if (!m_graphTag) {
93 		GraphIO::logger.lout() << "Expected \"graph\" tag." << std::endl;
94 		return false;
95 	}
96 
97 	m_nodesTag = m_graphTag.child("nodes");
98 	if(!m_nodesTag) {
99 		GraphIO::logger.lout() << "No \"nodes\" tag found in graph." << std::endl;
100 		return false;
101 	}
102 
103 	m_edgesTag = m_graphTag.child("edges");
104 	if(!m_edgesTag) {
105 		GraphIO::logger.lout() << "No \"edges\" tag found in graph." << std::endl;
106 		return false;
107 	}
108 
109 	// Read attributes definitions. Could be lazily read later only
110 	// if GraphAttributes is given.
111 	for(pugi::xml_node attrsTag : m_graphTag.children("attributes")) {
112 		pugi::xml_attribute classAttr = attrsTag.attribute("class");
113 
114 		if(!classAttr) {
115 			GraphIO::logger.lout() << "attributes tag is missing a class." << std::endl;
116 			return false;
117 		}
118 
119 		std::unordered_map<std::string, std::string> *attrMap;
120 
121 		if(string(classAttr.value()) == "node") {
122 			attrMap = &m_nodeAttr;
123 		} else if(string(classAttr.value()) == "edge") {
124 			attrMap = &m_edgeAttr;
125 		} else {
126 			GraphIO::logger.lout() << "unknown attributes tag class ('"
127 			           << classAttr.value() << "')." << std::endl;
128 			return false;
129 		}
130 
131 		if(!readAttrDefs(*attrMap, attrsTag)) {
132 			return false;
133 		}
134 	}
135 
136 	return true;
137 }
138 
139 
readNodes(Graph & G,GraphAttributes * GA)140 bool Parser::readNodes(Graph &G, GraphAttributes *GA)
141 {
142 	for(pugi::xml_node nodeTag : m_nodesTag.children("node")) {
143 		pugi::xml_attribute idAttr = nodeTag.attribute("id");
144 
145 		if(!idAttr) {
146 			GraphIO::logger.lout() << "node is missing an id attribute." << std::endl;
147 			return false;
148 		}
149 
150 		const node v = G.newNode();
151 		m_nodeId[idAttr.value()] = v;
152 
153 		if(GA) {
154 			readAttributes(*GA, v, nodeTag);
155 		}
156 	}
157 
158 	return true;
159 }
160 
161 
readCluster(Graph & G,ClusterGraph & C,ClusterGraphAttributes * CA,cluster rootCluster,const pugi::xml_node rootTag)162 bool Parser::readCluster(
163 	Graph &G, ClusterGraph &C,
164 	ClusterGraphAttributes *CA,
165 	cluster rootCluster,
166 	const pugi::xml_node rootTag)
167 {
168 	for(pugi::xml_node nodeTag : rootTag.children("node")) {
169 		pugi::xml_attribute idAttr = nodeTag.attribute("id");
170 
171 		if(!idAttr) {
172 			GraphIO::logger.lout() << "node is missing an id attribute." << std::endl;
173 			return false;
174 		}
175 
176 		// Node is a cluster iff it contains other nodes.
177 		pugi::xml_node nodesTag = nodeTag.child("nodes");
178 		if(nodesTag) {
179 			// Node tag found, therefore it is a cluster.
180 			const cluster c = C.newCluster(rootCluster);
181 			m_clusterId[idAttr.value()] = c;
182 
183 			if(!readCluster(G, C, CA, c, nodesTag)) {
184 				return false;
185 			}
186 		} else {
187 			// Node tag not found, therefore it is "normal" node.
188 			const node v = G.newNode();
189 			C.reassignNode(v, rootCluster);
190 			m_nodeId[idAttr.value()] = v;
191 
192 			if(CA) {
193 				readAttributes(*CA, v, nodeTag);
194 			}
195 		}
196 	}
197 
198 	return true;
199 }
200 
201 
202 /*
203  * Just a helper method to avoid ugly code in Parser#readEdges method. It just
204  * populates \p nodes list with either a given \p v node (if not \c nullptr) or all
205  * nodes in certain cluster found by performing a lookup with given \p id in
206  * \p clusterId association.
207  */
edgeNodes(node v,const std::string & id,const std::unordered_map<std::string,cluster> & clusterId,List<node> & nodes)208 static inline bool edgeNodes(
209 	node v,
210 	const std::string &id,
211 	const std::unordered_map<std::string, cluster> &clusterId,
212 	List<node> &nodes)
213 {
214 	if(v) {
215 		nodes.clear();
216 		nodes.pushBack(v);
217 	} else {
218 		auto c = clusterId.find(id);
219 		if(c == std::end(clusterId)) {
220 			return false;
221 		}
222 
223 		(c->second)->getClusterNodes(nodes);
224 	}
225 
226 	return true;
227 }
228 
229 
readEdges(Graph & G,ClusterGraph * C,GraphAttributes * GA)230 bool Parser::readEdges(Graph &G, ClusterGraph *C, GraphAttributes *GA)
231 {
232 	List<node> sourceNodes, targetNodes;
233 
234 	for(pugi::xml_node edgeTag : m_edgesTag.children("edge")) {
235 		pugi::xml_attribute sourceAttr = edgeTag.attribute("source");
236 		if(!sourceAttr) {
237 			GraphIO::logger.lout() << "edge is missing a source attribute." << std::endl;
238 			return false;
239 		}
240 
241 		pugi::xml_attribute targetAttr = edgeTag.attribute("target");
242 		if(!targetAttr) {
243 			GraphIO::logger.lout() << "edge is missing a target attribute." << std::endl;
244 			return false;
245 		}
246 
247 		const node source = m_nodeId[sourceAttr.value()];
248 		const node target = m_nodeId[targetAttr.value()];
249 
250 		if(source && target) {
251 			const edge e = G.newEdge(source, target);
252 			if(GA) {
253 				readAttributes(*GA, e, edgeTag);
254 			}
255 		} else if(C && edgeNodes(source, sourceAttr.value(), m_clusterId, sourceNodes)
256 		            && edgeNodes(target, targetAttr.value(), m_clusterId, targetNodes)) {
257 			// So, we perform cartesian product on two sets with Graph#newEdge.
258 			for(node s : sourceNodes) {
259 				for(node t : targetNodes) {
260 					const edge e = G.newEdge(s, t);
261 					if(GA) {
262 						readAttributes(*GA, e, edgeTag);
263 					}
264 				}
265 			}
266 		} else {
267 			GraphIO::logger.lout() << "source or target node doesn't exist." << std::endl;
268 			return false;
269 		}
270 	}
271 
272 	return true;
273 }
274 
275 
readColor(Color & color,const pugi::xml_node tag)276 static inline bool readColor(Color &color, const pugi::xml_node tag)
277 {
278 	pugi::xml_attribute redAttr = tag.attribute("red");
279 	pugi::xml_attribute greenAttr = tag.attribute("green");
280 	pugi::xml_attribute blueAttr = tag.attribute("blue");
281 	pugi::xml_attribute alphaAttr = tag.attribute("alpha");
282 
283 	if(!redAttr || !greenAttr || !blueAttr)
284 	{
285 		GraphIO::logger.lout() << "Missing compound attribute on color tag." << std::endl;
286 		return false;
287 	}
288 
289 	bool success = true;
290 	success &= GraphIO::setColorValue(redAttr.as_int(), [&](uint8_t val) { color.red(val); });
291 	success &= GraphIO::setColorValue(greenAttr.as_int(), [&](uint8_t val) { color.green(val); });
292 	success &= GraphIO::setColorValue(blueAttr.as_int(), [&](uint8_t val) { color.blue(val); });
293 	success &= !alphaAttr || GraphIO::setColorValue(alphaAttr.as_int(), [&](uint8_t val) { color.alpha(val); });
294 	return success;
295 }
296 
297 
readVizAttribute(GraphAttributes & GA,node v,const pugi::xml_node tag)298 static inline bool readVizAttribute(
299 	GraphAttributes &GA,
300 	node v,
301 	const pugi::xml_node tag)
302 {
303 	const long attrs = GA.attributes();
304 
305 	if(string(tag.name()) == "viz:position") {
306 		if(attrs & GraphAttributes::nodeGraphics) {
307 			pugi::xml_attribute xAttr = tag.attribute("x");
308 			pugi::xml_attribute yAttr = tag.attribute("y");
309 			pugi::xml_attribute zAttr = tag.attribute("z");
310 
311 			if(!xAttr || !yAttr) {
312 				GraphIO::logger.lout() << "Missing \"x\" or \"y\" in position tag." << std::endl;
313 				return false;
314 			}
315 
316 			GA.x(v) = xAttr.as_double();
317 			GA.y(v) = yAttr.as_double();
318 
319 			// z attribute is optional and avaliable only in \a threeD mode
320 			if (zAttr && (attrs & GraphAttributes::threeD)) {
321 				GA.z(v) = zAttr.as_double();
322 			}
323 		}
324 	} else if(string(tag.name()) == "viz:size") {
325 		if(attrs & GraphAttributes::nodeGraphics) {
326 			pugi::xml_attribute valueAttr = tag.attribute("value");
327 			if (!valueAttr) {
328 				GraphIO::logger.lout() << "\"size\" attribute is missing a value." << std::endl;
329 				return false;
330 			}
331 
332 			double size = valueAttr.as_double();
333 			GA.width(v) = size * LayoutStandards::defaultNodeWidth();
334 			GA.height(v) = size * LayoutStandards::defaultNodeHeight();
335 		}
336 	} else if(string(tag.name()) == "viz:shape") {
337 		if(attrs & GraphAttributes::nodeGraphics) {
338 			pugi::xml_attribute valueAttr = tag.attribute("value");
339 			if(!valueAttr) {
340 				GraphIO::logger.lout() << "\"shape\" attribute is missing a value." << std::endl;
341 				return false;
342 			}
343 
344 			GA.shape(v) = toShape(valueAttr.value());
345 		}
346 	} else if(string(tag.name()) == "viz:color") {
347 		if(attrs & GraphAttributes::nodeStyle) {
348 			return readColor(GA.fillColor(v), tag);
349 		}
350 	} else {
351 		GraphIO::logger.lout() << "Incorrect tag: \"" << tag.name() << "\"." << std::endl;
352 		return false;
353 	}
354 
355 	return true;
356 }
357 
358 
readVizAttribute(GraphAttributes & GA,edge e,const pugi::xml_node tag)359 static inline bool readVizAttribute(
360 	GraphAttributes &GA,
361 	edge e,
362 	const pugi::xml_node tag)
363 {
364 	const long attrs = GA.attributes();
365 
366 	if(string(tag.name()) == "viz:color") {
367 		if(attrs & GraphAttributes::edgeStyle) {
368 			return readColor(GA.strokeColor(e), tag);
369 		}
370 	} else if(string(tag.name()) == "viz:thickness") {
371 		auto thickAttr = tag.attribute("value");
372 		if(!thickAttr) {
373 			GraphIO::logger.lout() << "Missing \"value\" on thickness tag." << std::endl;
374 			return false;
375 		}
376 
377 		GA.strokeWidth(e) = thickAttr.as_double();
378 	} else if(string(tag.name()) == "viz:shape") {
379 		// GEXF supports solid, dotted, dashed, double.
380 		// We don't support double, but dashdot and dashdotdot instead.
381 		if(attrs & GraphAttributes::edgeStyle) {
382 			pugi::xml_attribute valueAttr = tag.attribute("value");
383 			if(!valueAttr) {
384 				GraphIO::logger.lout() << "Missing \"value\" on shape tag." << std::endl;
385 				return false;
386 			}
387 
388 			GA.strokeType(e) = toStrokeType(valueAttr.value());
389 		}
390 	} else {
391 		GraphIO::logger.lout() << "Incorrect tag \"" << tag.name() << "\"." << std::endl;
392 		return false;
393 	}
394 
395 	return true;
396 }
397 
398 
readAttValue(GraphAttributes & GA,node v,const std::string & name,const std::string & value)399 static inline void readAttValue(
400 	GraphAttributes &GA,
401 	node v,
402 	const std::string &name,
403 	const std::string &value)
404 {
405 	const long attrs = GA.attributes();
406 
407 	// For not "viz" attributes, we use GraphML ones.
408 	switch(graphml::toAttribute(name)) {
409 	case graphml::Attribute::NodeId:
410 		if(attrs & GraphAttributes::nodeId) {
411 			std::istringstream ss(value);
412 			ss >> GA.idNode(v);
413 		}
414 		break;
415 	case graphml::Attribute::NodeType:
416 		if(attrs & GraphAttributes::nodeType) {
417 			GA.type(v) = graphml::toNodeType(value);
418 		}
419 		break;
420 	case graphml::Attribute::Template:
421 		if(attrs & GraphAttributes::nodeTemplate) {
422 			GA.templateNode(v) = value;
423 		}
424 		break;
425 	case graphml::Attribute::NodeWeight:
426 		if(attrs & GraphAttributes::nodeWeight) {
427 			std::istringstream ss(value);
428 			ss >> GA.weight(v);
429 		}
430 		break;
431 	case graphml::Attribute::NodeStrokeType:
432 		if(attrs & GraphAttributes::nodeStyle) {
433 			GA.strokeType(v) = fromString<StrokeType>(value);
434 		}
435 		break;
436 	case graphml::Attribute::NodeFillPattern:
437 		if(attrs & GraphAttributes::nodeStyle) {
438 			GA.fillPattern(v) = fromString<FillPattern>(value);
439 		}
440 		break;
441 	case graphml::Attribute::NodeFillBackground:
442 		if(attrs & GraphAttributes::nodeStyle) {
443 			GA.fillBgColor(v) = value;
444 		}
445 		break;
446 	case graphml::Attribute::NodeStrokeWidth:
447 		if(attrs & GraphAttributes::nodeWeight) {
448 			std::istringstream ss(value);
449 			ss >> GA.strokeWidth(v);
450 		}
451 		break;
452 	case graphml::Attribute::NodeStrokeColor:
453 		if(attrs & GraphAttributes::nodeStyle) {
454 			GA.strokeColor(v) = value;
455 		}
456 		break;
457 	case graphml::Attribute::NodeLabelX:
458 		if(attrs & GraphAttributes::nodeLabelPosition) {
459 			std::istringstream ss(value);
460 			ss >> GA.xLabel(v);
461 		}
462 		break;
463 	case graphml::Attribute::NodeLabelY:
464 		if(attrs & GraphAttributes::nodeLabelPosition) {
465 			std::istringstream ss(value);
466 			ss >> GA.yLabel(v);
467 		}
468 		break;
469 	case graphml::Attribute::NodeLabelZ:
470 		if(attrs & GraphAttributes::nodeLabelPosition && attrs & GraphAttributes::threeD) {
471 			std::istringstream ss(value);
472 			ss >> GA.zLabel(v);
473 		}
474 		break;
475 	default:
476 		GraphIO::logger.slout() << "unsupported GraphML attr " << name << "\n";
477 		break;
478 	}
479 }
480 
481 
readAttValue(GraphAttributes & GA,edge e,const std::string & name,const std::string & value)482 static inline void readAttValue(
483 	GraphAttributes &GA,
484 	edge e,
485 	const std::string &name,
486 	const std::string &value)
487 {
488 	const long attrs = GA.attributes();
489 
490 	// For not "viz" attributes, we use GraphML ones.
491 	switch(graphml::toAttribute(name)) {
492 	case graphml::Attribute::EdgeType:
493 		if(attrs & GraphAttributes::edgeType) {
494 			GA.type(e) = graphml::toEdgeType(value);
495 		}
496 		break;
497 	case graphml::Attribute::EdgeArrow:
498 		if(attrs & GraphAttributes::edgeArrow) {
499 			GA.arrowType(e) = graphml::toArrow(value);
500 		}
501 		break;
502 	case graphml::Attribute::EdgeBends:
503 		if(attrs & GraphAttributes::edgeGraphics) {
504 			std::stringstream is(value);
505 			double x, y;
506 			DPolyline& polyline = GA.bends(e);
507 			polyline.clear();
508 			while(is >> x && is >> y) {
509 				polyline.pushBack(DPoint(x, y));
510 			}
511 		}
512 		break;
513 	case graphml::Attribute::EdgeSubGraph:
514 		if(attrs & GraphAttributes::edgeSubGraphs) {
515 			std::stringstream sstream(value);
516 			int sg;
517 			while(sstream >> sg) {
518 				GA.addSubGraph(e, sg);
519 			}
520 		}
521 		break;
522 	default:
523 		// Not supported attribute, just ignore.
524 		break;
525 	}
526 }
527 
528 
529 template <typename T>
readAttValues(GraphAttributes & GA,T element,const pugi::xml_node tag,std::unordered_map<std::string,std::string> & attrMap)530 static inline bool readAttValues(
531 	GraphAttributes &GA,
532 	T element,
533 	const pugi::xml_node tag,
534 	std::unordered_map<std::string, std::string> &attrMap)
535 {
536 	for(pugi::xml_node attVal : tag.children("attvalue")) {
537 		pugi::xml_attribute forAttr = attVal.attribute("for");
538 		pugi::xml_attribute valueAttr = attVal.attribute("value");
539 
540 		if(!forAttr || !valueAttr)
541 		{
542 			GraphIO::logger.lout() << "\"for\" or \"value\" not found for attvalue tag." << std::endl;
543 			return false;
544 		}
545 
546 		const std::string &attrName = attrMap[forAttr.value()];
547 		readAttValue(GA, element, attrName, valueAttr.value());
548 	}
549 
550 	return true;
551 }
552 
553 
readAttributes(GraphAttributes & GA,node v,const pugi::xml_node nodeTag)554 bool Parser::readAttributes(
555 	GraphAttributes &GA, node v,
556 	const pugi::xml_node nodeTag)
557 {
558 	if (GA.has(GraphAttributes::nodeLabel)) {
559 		pugi::xml_attribute labelAttr = nodeTag.attribute("label");
560 		if (labelAttr) {
561 			GA.label(v) = labelAttr.as_string();
562 		}
563 	}
564 	for(const pugi::xml_node tag : nodeTag.children()) {
565 		if(string(tag.name()) == "nodes") {
566 			continue;
567 		} else if(string(tag.name()) == "attvalues") {
568 			if (!readAttValues(GA, v, tag, m_nodeAttr)) {
569 				return false;
570 			}
571 		} else if(!readVizAttribute(GA, v, tag)) {
572 			return false;
573 		}
574 	}
575 
576 	return true;
577 }
578 
579 
readAttributes(GraphAttributes & GA,edge e,const pugi::xml_node edgeTag)580 bool Parser::readAttributes(
581 	GraphAttributes &GA, edge e,
582 	const pugi::xml_node edgeTag)
583 {
584 	if (GA.has(GraphAttributes::edgeLabel)) {
585 		pugi::xml_attribute labelAttr = edgeTag.attribute("label");
586 		if (labelAttr) {
587 			GA.label(e) = labelAttr.as_string();
588 		}
589 	}
590 	if (GA.has(GraphAttributes::edgeDoubleWeight)) {
591 		pugi::xml_attribute weightAttr = edgeTag.attribute("weight");
592 		GA.doubleWeight(e) = weightAttr.as_double();
593 	} else if (GA.has(GraphAttributes::edgeIntWeight)) {
594 		pugi::xml_attribute weightAttr = edgeTag.attribute("weight");
595 		GA.intWeight(e) = weightAttr.as_int();
596 	}
597 
598 	for(const pugi::xml_node tag : edgeTag.children()) {
599 		if(string(tag.name()) == "attvalues") {
600 			return readAttValues(GA, e, tag, m_edgeAttr);
601 		} else if(!readVizAttribute(GA, e, tag)) {
602 			return false;
603 		}
604 	}
605 
606 	return true;
607 }
608 
609 
read(Graph & G)610 bool Parser::read(Graph &G)
611 {
612 	if(!init()) {
613 		return false;
614 	}
615 	OGDF_ASSERT(m_graphTag);
616 
617 	G.clear();
618 
619 	return readNodes(G, nullptr) && readEdges(G, nullptr, nullptr);
620 }
621 
622 
read(Graph & G,GraphAttributes & GA)623 bool Parser::read(Graph &G, GraphAttributes &GA)
624 {
625 	if(!init()) {
626 		return false;
627 	}
628 	OGDF_ASSERT(m_graphTag);
629 
630 	G.clear();
631 
632 	// Check whether graph is directed or not (undirected by default).
633 	pugi::xml_attribute edgeDirAttr = m_graphTag.attribute("defaultedgetype");
634 	GA.directed() = !(edgeDirAttr && string(edgeDirAttr.value()) == "undirected");
635 
636 	return readNodes(G, &GA) && readEdges(G, nullptr, &GA);
637 }
638 
639 
read(Graph & G,ClusterGraph & C)640 bool Parser::read(Graph &G, ClusterGraph &C)
641 {
642 	if(!init()) {
643 		return false;
644 	}
645 	OGDF_ASSERT(m_graphTag);
646 
647 	G.clear();
648 
649 	return readCluster(G, C, nullptr, C.rootCluster(), m_nodesTag) &&
650 	       readEdges(G, &C, nullptr);
651 }
652 
653 
read(Graph & G,ClusterGraph & C,ClusterGraphAttributes & CA)654 bool Parser::read(Graph &G, ClusterGraph &C, ClusterGraphAttributes &CA)
655 {
656 	if(!init()) {
657 		return false;
658 	}
659 	OGDF_ASSERT(m_graphTag);
660 
661 	G.clear();
662 
663 	return readCluster(G, C, &CA, C.rootCluster(), m_nodesTag) &&
664 	       readEdges(G, &C, &CA);
665 }
666 
667 }
668 }
669