1 /** \file
2  * \brief Implementation of UCINET DL format parser class.
3  *
4  * \author Łukasz Hanuszczak
5  *
6  * \par License:
7  * This file is part of the Open Graph Drawing Framework (OGDF).
8  *
9  * \par
10  * Copyright (C)<br>
11  * See README.md in the OGDF root directory for details.
12  *
13  * \par
14  * This program is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU General Public License
16  * Version 2 or 3 as published by the Free Software Foundation;
17  * see the file LICENSE.txt included in the packaging of this file
18  * for details.
19  *
20  * \par
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * \par
27  * You should have received a copy of the GNU General Public
28  * License along with this program; if not, see
29  * http://www.gnu.org/copyleft/gpl.html
30  */
31 
32 #include <ogdf/fileformats/DLParser.h>
33 #include <ogdf/fileformats/GraphIO.h>
34 
35 
36 namespace ogdf {
37 
38 
DLParser(std::istream & is)39 DLParser::DLParser(std::istream &is) : m_istream(is)
40 {
41 	init();
42 }
43 
44 
init()45 void DLParser::init()
46 {
47 	m_initialized = false;
48 	m_nodeId.resize(1, nullptr);
49 
50 	m_embedded = false;
51 	m_nodes = -1;
52 	m_format = Format::FullMatrix;
53 }
54 
55 
initGraph(Graph & G)56 bool DLParser::initGraph(Graph &G)
57 {
58 	G.clear();
59 
60 	if(m_nodes < 0) {
61 		GraphIO::logger.lout() << "Node count not specified or incorrect." << std::endl;
62 		return false;
63 	}
64 
65 	for(int i = 0; i < m_nodes; i++) {
66 		m_nodeId.push_back(G.newNode());
67 	}
68 	m_initialized = true;
69 
70 	return true;
71 }
72 
73 
74 // Common for both embedded and non-embedded mode.
readMatrixRow(std::istream & is,Graph & G,GraphAttributes * GA,node v)75 static inline bool readMatrixRow(
76 	std::istream &is,
77 	Graph &G, GraphAttributes *GA, node v)
78 {
79 	const long attrs = GA ? GA->attributes() : 0;
80 	const bool iweight = (attrs & GraphAttributes::edgeIntWeight) != 0;
81 	const bool dweight = (attrs & GraphAttributes::edgeDoubleWeight) != 0;
82 
83 	for(node u : G.nodes) {
84 		double weight;
85 		if(!(is >> weight)) {
86 			GraphIO::logger.lout() << "Expected matrix value." << std::endl;
87 			return false;
88 		}
89 
90 		edge e = nullptr;
91 		if(weight != 0) {
92 			e = G.newEdge(v, u);
93 		}
94 
95 		if(e && iweight) {
96 			GA->doubleWeight(e) = weight;
97 		} else if(e && dweight) {
98 			GA->intWeight(e) = static_cast<int>(weight);
99 		}
100 	}
101 
102 	return true;
103 }
104 
105 
readMatrix(Graph & G,GraphAttributes * GA)106 bool DLParser::readMatrix(Graph &G, GraphAttributes *GA)
107 {
108 	for(node v : G.nodes) {
109 		if(!readMatrixRow(m_istream, G, GA, v)) {
110 			return false;
111 		}
112 	}
113 
114 	std::string extra;
115 	if(m_istream >> extra) {
116 		GraphIO::logger.lout() << "Expected EOF, but \"" << extra << "\" found." << std::endl;
117 		return false;
118 	}
119 
120 	return true;
121 }
122 
123 
readEmbeddedMatrix(Graph & G,GraphAttributes * GA)124 bool DLParser::readEmbeddedMatrix(Graph &G, GraphAttributes *GA)
125 {
126 	// First, top-label line.
127 	for(node v : G.nodes) {
128 		std::string label;
129 		if(!(m_istream >> label)) {
130 			GraphIO::logger.lout() << "Expected node embedded label." << std::endl;
131 			return false;
132 		}
133 		toLower(label);
134 
135 		if(GA && GA->has(GraphAttributes::nodeLabel)) {
136 			GA->label(v) = label;
137 		}
138 		m_nodeLabel[label] = v;
139 	}
140 
141 	// Now, each row have a label and then "normal" row.
142 	for(int i = 0; i < G.numberOfNodes(); i++) {
143 		std::string label;
144 		if(!(m_istream >> label)) {
145 			GraphIO::logger.lout() << "Expected node embedded label." << std::endl;
146 			return false;
147 		}
148 		toLower(label);
149 
150 		node v = m_nodeLabel[label];
151 		if(!v) {
152 			GraphIO::logger.lout() << "Node with given label." << label << "\" not found." << std::endl;
153 			return false;
154 		}
155 
156 		if(!readMatrixRow(m_istream, G, GA, v)) {
157 			return false;
158 		}
159 	}
160 
161 	return true;
162 }
163 
164 
165 // Common for both embedded and non-emedded mode.
readEdgeListRow(std::istringstream & is,Graph & G,GraphAttributes * GA,node v,node u)166 static inline bool readEdgeListRow(
167 	std::istringstream &is,
168 	Graph &G, GraphAttributes *GA, node v, node u)
169 {
170 	edge e = G.newEdge(v, u);
171 	double weight;
172 	is >> weight;
173 	if(GA && !is.bad()) {
174 		if(GA->has(GraphAttributes::edgeDoubleWeight)) {
175 			GA->doubleWeight(e) = weight;
176 		} else if(GA->has(GraphAttributes::edgeIntWeight)) {
177 			GA->intWeight(e) = static_cast<int>(weight);
178 		}
179 	}
180 
181 	if (is.rdbuf()->in_avail()) {
182 		GraphIO::logger.lout() << "Could not parse entire row of edge list." << std::endl;
183 			return false;
184 	}
185 
186 	return true;
187 }
188 
189 
requestLabel(GraphAttributes * GA,node & nextFree,const std::string & label)190 inline node DLParser::requestLabel(
191 	GraphAttributes *GA, node &nextFree,
192 	const std::string &label)
193 {
194 	node v = m_nodeLabel[label];
195 
196 	if(!v) {
197 		if(nextFree == nullptr) {
198 			GraphIO::logger.lout() << "Cannot assign label \"" << label << "\", "
199 			          << "node count in the graph is too low." << std::endl;
200 			return nullptr;
201 		}
202 		m_nodeLabel[label] = v = nextFree;
203 		if(GA && GA->has(GraphAttributes::nodeLabel)) {
204 			GA->label(v) = label;
205 		}
206 		nextFree = nextFree->succ();
207 	}
208 
209 	return v;
210 }
211 
212 
readEdgeList(Graph & G,GraphAttributes * GA)213 bool DLParser::readEdgeList(Graph &G, GraphAttributes *GA)
214 {
215 	std::string buffer;
216 	for(size_t line = 1; std::getline(m_istream, buffer); line++) {
217 		buffer.erase(buffer.find_last_not_of(" \n\r\t") + 1);
218 
219 		// Not necessary I guess, but does not do any harm.
220 		if(buffer.empty()) {
221 			continue;
222 		}
223 
224 		std::istringstream is(buffer);
225 		int vid, uid;
226 
227 		if(!(is >> vid >> uid) || !fineId(vid) || !fineId(uid)) {
228 			GraphIO::logger.lout() << "Node id incorrect (data line "
229 					  << line << "), maximum value is "
230 					  << m_nodeId.size() - 1 << "." << std::endl;
231 			return false;
232 		}
233 
234 		if (!readEdgeListRow(is, G, GA, m_nodeId[vid], m_nodeId[uid])) {
235 			return false;
236 		}
237 	}
238 
239 	return true;
240 }
241 
242 
readEmbeddedEdgeList(Graph & G,GraphAttributes * GA)243 bool DLParser::readEmbeddedEdgeList(Graph &G, GraphAttributes *GA)
244 {
245 	std::string buffer;
246 
247 	node nextFree = G.firstNode();
248 	for(size_t line = 1; std::getline(m_istream, buffer); line++) {
249 		buffer.erase(buffer.find_last_not_of(" \n\r\t") + 1);
250 
251 		if(buffer.empty()) {
252 			continue;
253 		}
254 		std::istringstream is(buffer);
255 
256 		std::string vlabel, ulabel;
257 		if(!(is >> vlabel >> ulabel)) {
258 			GraphIO::logger.lout() << "Expected embedded node labels (data line "
259 					  << line << "), got \"" << is.str() << "\"." << std::endl;
260 			return false;
261 		}
262 
263 		node v = requestLabel(GA, nextFree, vlabel);
264 		node u = requestLabel(GA, nextFree, ulabel);
265 		if(v == nullptr || u == nullptr) {
266 			return false;
267 		} else {
268 			if(!readEdgeListRow(is, G, GA, v, u)) {
269 				return false;
270 			}
271 		}
272 	}
273 
274 	return true;
275 }
276 
277 
readNodeList(Graph & G)278 bool DLParser::readNodeList(Graph &G)
279 {
280 	std::string buffer;
281 	for(size_t line = 1; std::getline(m_istream, buffer); line++) {
282 		std::istringstream is(buffer);
283 
284 		// As always, either ingore incorrect line or throw error.
285 		int vid;
286 		if(!(is >> vid)) {
287 			continue;
288 		}
289 
290 		if(!fineId(vid)) {
291 			GraphIO::logger.lout() << "Node id incorrect (data line "
292 					  << line << "." << std::endl;
293 			return false;
294 		}
295 		node v = m_nodeId[vid];
296 
297 		int uid;
298 		while(is >> uid) {
299 			if(!fineId(uid)) {
300 				GraphIO::logger.lout() << "Node id incorrect (data line "
301 						  << line << ")." << std::endl;
302 				return false;
303 			}
304 
305 			G.newEdge(v, m_nodeId[uid]);
306 		}
307 	}
308 
309 	return true;
310 }
311 
312 
readEmbeddedNodeList(Graph & G,GraphAttributes * GA)313 bool DLParser::readEmbeddedNodeList(Graph &G, GraphAttributes *GA)
314 {
315 	std::string buffer;
316 
317 	node nextFree = G.firstNode();
318 	for(size_t line = 1; std::getline(m_istream, buffer); line++) {
319 		std::istringstream is(buffer);
320 
321 		std::string vlabel;
322 		if(!(is >> vlabel)) {
323 			continue;
324 		}
325 
326 		node v = requestLabel(GA, nextFree, vlabel);
327 		if(v == nullptr) {
328 			return false;
329 		}
330 
331 		std::string ulabel;
332 		while(is >> ulabel) {
333 			node u = requestLabel(GA, nextFree, ulabel);
334 			if(u == nullptr) {
335 				return false;
336 			}
337 			G.newEdge(v, u);
338 		}
339 	}
340 
341 	return true;
342 }
343 
344 
readData(Graph & G,GraphAttributes * GA)345 bool DLParser::readData(Graph &G, GraphAttributes *GA)
346 {
347 	if(m_nodes < 0) {
348 		GraphIO::logger.lout() << "Number of nodes not specified or incorrect." << std::endl;
349 		return false;
350 	}
351 
352 	if(!m_initialized) {
353 		initGraph(G);
354 	}
355 
356 	// Now, depending on the method choosen we actually read the graph.
357 	switch(m_format) {
358 	case Format::FullMatrix:
359 		return m_embedded ? readEmbeddedMatrix(G, GA) : readMatrix(G, GA);
360 	case Format::EdgeList:
361 		return m_embedded ? readEmbeddedEdgeList(G, GA) : readEdgeList(G, GA);
362 	case Format::NodeList:
363 		return m_embedded ? readEmbeddedNodeList(G, GA) : readNodeList(G);
364 	}
365 
366 	return false;
367 }
368 
369 
370 /*
371  * This function is quite ugly (sphagetti code all the way). That is because
372  * it is trying to mirror shitty DL format design. It is terrible, seriously.
373  */
readWithLabels(Graph & G,GraphAttributes * GA)374 bool DLParser::readWithLabels(Graph &G, GraphAttributes *GA)
375 {
376 
377 	std::string buffer;
378 
379 	initGraph(G);
380 	for(node v = G.firstNode(); v;) {
381 		if(!(m_istream >> buffer)) {
382 			GraphIO::logger.lout() << "Expected node labels." << std::endl;
383 			return false;
384 		}
385 		toLower(buffer); // Labels should be lowercase.
386 
387 		// We check whether we need to end reading labels.
388 		if(buffer == "data:") {
389 			return readData(G, GA);
390 		} else if(buffer == "labels") {
391 			// Or we have "labels embedded" information.
392 			m_istream >> buffer;
393 			toLower(buffer);
394 			if(buffer != "embedded:" && buffer != "embedded") {
395 				GraphIO::logger.lout() << "Expected embedded keyword, got \""
396 						  << buffer << "\"." << std::endl;
397 				return false;
398 			}
399 
400 			m_embedded = true;
401 			break;
402 		}
403 
404 		// We split input via comma and read labels for succesive nodes.
405 		std::istringstream is(buffer);
406 		while(std::getline(is, buffer, ',')) {
407 			// There is no need parsing labels if GA is not given.
408 			if(GA && GA->has(GraphAttributes::nodeLabel)) {
409 				GA->label(v) = buffer;
410 			}
411 			m_nodeLabel[buffer] = v;
412 			v = v->succ();
413 		}
414 	}
415 
416 	m_istream >> buffer;
417 	toUpper(buffer);
418 
419 	if(buffer == "LABELS") {
420 		m_istream >> buffer;
421 		toUpper(buffer);
422 		if(buffer != "EMBEDDED:" && buffer != "EMBEDDED") {
423 			GraphIO::logger.lout() << "Expected \"EMBEDDED\" keyword, got \""
424 					  << buffer << "\"." << std::endl;
425 			return false;
426 		}
427 
428 		m_embedded = true;
429 		m_istream >> buffer;
430 		toUpper(buffer);
431 	}
432 
433 	if(buffer != "DATA:") {
434 		GraphIO::logger.lout() << "Expected \"DATA:\" statement, got \""
435 				  << buffer << "\"." << std::endl;
436 		return false;
437 	}
438 
439 	return readData(G, GA);
440 }
441 
442 
readAssignment(Graph & G,const std::string & lhs,const std::string & rhs)443 bool DLParser::readAssignment(
444 	Graph &G,
445 	const std::string &lhs, const std::string &rhs)
446 {
447 
448 	if(lhs == "N") {
449 		std::istringstream is(rhs);
450 		if(!(is >> m_nodes)) {
451 			GraphIO::logger.lout() << "Incorrect number of nodes." << std::endl;
452 			return false;
453 		}
454 	} else if(lhs == "FORMAT") {
455 		if(rhs == "FULLMATRIX" || rhs == "FM") {
456 			m_format = Format::FullMatrix;
457 		} else if(rhs == "EDGELIST1" || rhs == "EL1") {
458 			m_format = Format::EdgeList;
459 		} else if(rhs == "NODELIST1" || rhs == "NL1") {
460 			m_format = Format::NodeList;
461 		} else {
462 			GraphIO::logger.lout() << "Unknown data format \"" << rhs << "\"."
463 			                       << "Supported formats are: FM, EL1 and NL1" << std::endl;
464 			return false;
465 		}
466 	} else {
467 		GraphIO::logger.lout() << "Unkown assignment statement: "
468 				  << "\"" << lhs << "\"." << std::endl;
469 		return false;
470 	}
471 
472 	return true;
473 }
474 
475 
readStatements(Graph & G,GraphAttributes * GA)476 bool DLParser::readStatements(Graph &G, GraphAttributes *GA)
477 {
478 	std::string buffer;
479 
480 	if(!(m_istream >> buffer)) {
481 		GraphIO::logger.lout() << "Expected statement." << std::endl;
482 		return false;
483 	}
484 	toUpper(buffer);
485 
486 	if(buffer == "DATA:") {
487 		return readData(G, GA);
488 	}
489 
490 	if(buffer == "LABELS:") {
491 		return readWithLabels(G, GA);
492 	}
493 
494 	if(buffer == "LABELS") {
495 		m_istream >> buffer;
496 		toUpper(buffer);
497 		if(buffer != "EMBEDDED" && buffer != "EMBEDDED:") {
498 			GraphIO::logger.lout() << "Unknown statement "
499 					  << "\"LABELS " << buffer << "\". "
500 					  << "Did you mean \"LABELS:\" or \"LABELS EMBEDDED\"?" << std::endl;
501 			return false;
502 		}
503 
504 		m_embedded = true;
505 
506 		// ... and here we go again.
507 		return readStatements(G, GA);
508 	}
509 
510 	// If none of the above, try interpreting this as assignment statement.
511 	size_t eq = buffer.find('=');
512 	std::string lhs, rhs;
513 	if(eq == std::string::npos) {
514 		// '=' not found inside, therefore buffer has to be left side.
515 		lhs = buffer;
516 		char c;
517 		if(!(m_istream >> c) || c != '=') {
518 			GraphIO::logger.lout() << "Expected definition or assignment "
519 					  << "statement, got: \"" << lhs << "\"." << std::endl;
520 			return false;
521 		}
522 
523 		if(!(m_istream >> rhs)) {
524 			GraphIO::logger.lout() << "Expected assignment right side." << std::endl;
525 			return false;
526 		}
527 	} else if(eq == buffer.size() - 1) {
528 		// 'lhs= rhs' case.
529 		if(!(m_istream >> rhs)) {
530 			GraphIO::logger.lout() << "Expected assignment right side." << std::endl;
531 			return false;
532 		}
533 		lhs = buffer.substr(0, eq);
534 	} else {
535 		// 'lhs=rhs' case.
536 		lhs = buffer.substr(0, eq);
537 		rhs = buffer.substr(eq + 1);
538 	}
539 	toUpper(lhs);
540 	toUpper(rhs);
541 
542 	return readAssignment(G, lhs, rhs) && readStatements(G, GA);
543 
544 }
545 
546 
readGraph(Graph & G,GraphAttributes * GA)547 bool DLParser::readGraph(Graph &G, GraphAttributes *GA)
548 {
549 	init();
550 	std::string buffer;
551 
552 	m_istream >> buffer;
553 	toUpper(buffer);
554 
555 	if(buffer != "DL") {
556 		GraphIO::logger.lout() << "Expected the \"DL\" header, got: \""
557 				  << buffer << "\"." << std::endl;
558 	}
559 
560 	return readStatements(G, GA);
561 }
562 
563 
564 }
565