1// Copyright ©2017 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package dot
6
7import (
8	"fmt"
9	"strconv"
10	"strings"
11
12	"gonum.org/v1/gonum/graph"
13	"gonum.org/v1/gonum/graph/encoding"
14	"gonum.org/v1/gonum/graph/formats/dot"
15	"gonum.org/v1/gonum/graph/formats/dot/ast"
16	"gonum.org/v1/gonum/graph/internal/set"
17)
18
19// AttributeSetters is implemented by graph values that can set global
20// DOT attributes.
21type AttributeSetters interface {
22	// DOTAttributeSetters returns the global attribute setters.
23	DOTAttributeSetters() (graph, node, edge encoding.AttributeSetter)
24}
25
26// DOTIDSetter is implemented by types that can set a DOT ID.
27type DOTIDSetter interface {
28	SetDOTID(id string)
29}
30
31// PortSetter is implemented by graph.Edge and graph.Line that can set
32// the DOT port and compass directions of an edge.
33type PortSetter interface {
34	// SetFromPort sets the From port and
35	// compass direction of the receiver.
36	SetFromPort(port, compass string) error
37
38	// SetToPort sets the To port and compass
39	// direction of the receiver.
40	SetToPort(port, compass string) error
41}
42
43// Unmarshal parses the Graphviz DOT-encoded data and stores the result in dst.
44// If the number of graphs encoded in data is not one, an error is returned and
45// dst will hold the first graph in data.
46//
47// Attributes and IDs are unquoted during unmarshalling if appropriate.
48func Unmarshal(data []byte, dst encoding.Builder) error {
49	file, err := dot.ParseBytes(data)
50	if err != nil {
51		return err
52	}
53	err = copyGraph(dst, file.Graphs[0])
54	if err == nil && len(file.Graphs) != 1 {
55		err = fmt.Errorf("invalid number of graphs; expected 1, got %d", len(file.Graphs))
56	}
57	return err
58}
59
60// UnmarshalMulti parses the Graphviz DOT-encoded data as a multigraph and
61// stores the result in dst.
62// If the number of graphs encoded in data is not one, an error is returned and
63// dst will hold the first graph in data.
64//
65// Attributes and IDs are unquoted during unmarshalling if appropriate.
66func UnmarshalMulti(data []byte, dst encoding.MultiBuilder) error {
67	file, err := dot.ParseBytes(data)
68	if err != nil {
69		return err
70	}
71	err = copyMultigraph(dst, file.Graphs[0])
72	if err == nil && len(file.Graphs) != 1 {
73		err = fmt.Errorf("invalid number of graphs; expected 1, got %d", len(file.Graphs))
74	}
75	return err
76}
77
78// copyGraph copies the nodes and edges from the Graphviz AST source graph to
79// the destination graph. Edge direction is maintained if present.
80func copyGraph(dst encoding.Builder, src *ast.Graph) (err error) {
81	defer func() {
82		switch e := recover().(type) {
83		case nil:
84		case error:
85			err = e
86		default:
87			panic(e)
88		}
89	}()
90	gen := &simpleGraph{
91		generator: generator{
92			directed: src.Directed,
93			ids:      make(map[string]graph.Node),
94		},
95	}
96	if dst, ok := dst.(DOTIDSetter); ok {
97		dst.SetDOTID(unquoteID(src.ID))
98	}
99	if a, ok := dst.(AttributeSetters); ok {
100		gen.graphAttr, gen.nodeAttr, gen.edgeAttr = a.DOTAttributeSetters()
101	}
102	for _, stmt := range src.Stmts {
103		gen.addStmt(dst, stmt)
104	}
105	return err
106}
107
108// copyMultigraph copies the nodes and edges from the Graphviz AST source graph to
109// the destination graph. Edge direction is maintained if present.
110func copyMultigraph(dst encoding.MultiBuilder, src *ast.Graph) (err error) {
111	defer func() {
112		switch e := recover().(type) {
113		case nil:
114		case error:
115			err = e
116		default:
117			panic(e)
118		}
119	}()
120	gen := &multiGraph{
121		generator: generator{
122			directed: src.Directed,
123			ids:      make(map[string]graph.Node),
124		},
125	}
126	if dst, ok := dst.(DOTIDSetter); ok {
127		dst.SetDOTID(unquoteID(src.ID))
128	}
129	if a, ok := dst.(AttributeSetters); ok {
130		gen.graphAttr, gen.nodeAttr, gen.edgeAttr = a.DOTAttributeSetters()
131	}
132	for _, stmt := range src.Stmts {
133		gen.addStmt(dst, stmt)
134	}
135	return err
136}
137
138// A generator keeps track of the information required for generating a gonum
139// graph from a dot AST graph.
140type generator struct {
141	// Directed graph.
142	directed bool
143	// Map from dot AST node ID to gonum node.
144	ids map[string]graph.Node
145	// Nodes processed within the context of a subgraph, that is to be used as a
146	// vertex of an edge.
147	subNodes []graph.Node
148	// Stack of start indices into the subgraph node slice. The top element
149	// corresponds to the start index of the active (or inner-most) subgraph.
150	subStart []int
151	// graphAttr, nodeAttr and edgeAttr are global graph attributes.
152	graphAttr, nodeAttr, edgeAttr encoding.AttributeSetter
153}
154
155// node returns the gonum node corresponding to the given dot AST node ID,
156// generating a new such node if none exist.
157func (gen *generator) node(dst graph.NodeAdder, id string) graph.Node {
158	if n, ok := gen.ids[id]; ok {
159		return n
160	}
161	n := dst.NewNode()
162	if n, ok := n.(DOTIDSetter); ok {
163		n.SetDOTID(unquoteID(id))
164	}
165	dst.AddNode(n)
166	gen.ids[id] = n
167	// Check if within the context of a subgraph, that is to be used as a vertex
168	// of an edge.
169	if gen.isInSubgraph() {
170		// Append node processed within the context of a subgraph, that is to be
171		// used as a vertex of an edge
172		gen.appendSubgraphNode(n)
173	}
174	return n
175}
176
177type simpleGraph struct{ generator }
178
179// addStmt adds the given statement to the graph.
180func (gen *simpleGraph) addStmt(dst encoding.Builder, stmt ast.Stmt) {
181	switch stmt := stmt.(type) {
182	case *ast.NodeStmt:
183		n, ok := gen.node(dst, stmt.Node.ID).(encoding.AttributeSetter)
184		if !ok {
185			return
186		}
187		for _, attr := range stmt.Attrs {
188			a := encoding.Attribute{
189				Key:   unquoteID(attr.Key),
190				Value: unquoteID(attr.Val),
191			}
192			if err := n.SetAttribute(a); err != nil {
193				panic(fmt.Errorf("unable to unmarshal node DOT attribute (%s=%s): %v", a.Key, a.Value, err))
194			}
195		}
196	case *ast.EdgeStmt:
197		gen.addEdgeStmt(dst, stmt)
198	case *ast.AttrStmt:
199		var n encoding.AttributeSetter
200		var dst string
201		switch stmt.Kind {
202		case ast.GraphKind:
203			if gen.graphAttr == nil {
204				return
205			}
206			n = gen.graphAttr
207			dst = "graph"
208		case ast.NodeKind:
209			if gen.nodeAttr == nil {
210				return
211			}
212			n = gen.nodeAttr
213			dst = "node"
214		case ast.EdgeKind:
215			if gen.edgeAttr == nil {
216				return
217			}
218			n = gen.edgeAttr
219			dst = "edge"
220		default:
221			panic("unreachable")
222		}
223		for _, attr := range stmt.Attrs {
224			a := encoding.Attribute{
225				Key:   unquoteID(attr.Key),
226				Value: unquoteID(attr.Val),
227			}
228			if err := n.SetAttribute(a); err != nil {
229				panic(fmt.Errorf("unable to unmarshal global %s DOT attribute (%s=%s): %v", dst, a.Key, a.Value, err))
230			}
231		}
232	case *ast.Attr:
233		// ignore.
234	case *ast.Subgraph:
235		for _, stmt := range stmt.Stmts {
236			gen.addStmt(dst, stmt)
237		}
238	default:
239		panic(fmt.Sprintf("unknown statement type %T", stmt))
240	}
241}
242
243// basicEdge is an edge without the Reverse method to
244// allow satisfaction by both graph.Edge and graph.Line.
245type basicEdge interface {
246	From() graph.Node
247	To() graph.Node
248}
249
250// applyPortsToEdge applies the available port metadata from an ast.Edge
251// to a graph.Edge
252func applyPortsToEdge(from ast.Vertex, to *ast.Edge, edge basicEdge) {
253	if ps, isPortSetter := edge.(PortSetter); isPortSetter {
254		if n, vertexIsNode := from.(*ast.Node); vertexIsNode {
255			if n.Port != nil {
256				err := ps.SetFromPort(unquoteID(n.Port.ID), n.Port.CompassPoint.String())
257				if err != nil {
258					panic(fmt.Errorf("unable to unmarshal edge port (:%s:%s)", n.Port.ID, n.Port.CompassPoint.String()))
259				}
260			}
261		}
262
263		if n, vertexIsNode := to.Vertex.(*ast.Node); vertexIsNode {
264			if n.Port != nil {
265				err := ps.SetToPort(unquoteID(n.Port.ID), n.Port.CompassPoint.String())
266				if err != nil {
267					panic(fmt.Errorf("unable to unmarshal edge DOT port (:%s:%s)", n.Port.ID, n.Port.CompassPoint.String()))
268				}
269			}
270		}
271	}
272}
273
274// addEdgeStmt adds the given edge statement to the graph.
275func (gen *simpleGraph) addEdgeStmt(dst encoding.Builder, stmt *ast.EdgeStmt) {
276	fs := gen.addVertex(dst, stmt.From)
277	ts := gen.addEdge(dst, stmt.To, stmt.Attrs)
278	for _, f := range fs {
279		for _, t := range ts {
280			edge := dst.NewEdge(f, t)
281			dst.SetEdge(edge)
282			applyPortsToEdge(stmt.From, stmt.To, edge)
283			addEdgeAttrs(edge, stmt.Attrs)
284		}
285	}
286}
287
288// addVertex adds the given vertex to the graph, and returns its set of nodes.
289func (gen *simpleGraph) addVertex(dst encoding.Builder, v ast.Vertex) []graph.Node {
290	switch v := v.(type) {
291	case *ast.Node:
292		n := gen.node(dst, v.ID)
293		return []graph.Node{n}
294	case *ast.Subgraph:
295		gen.pushSubgraph()
296		for _, stmt := range v.Stmts {
297			gen.addStmt(dst, stmt)
298		}
299		return gen.popSubgraph()
300	default:
301		panic(fmt.Sprintf("unknown vertex type %T", v))
302	}
303}
304
305// addEdge adds the given edge to the graph, and returns its set of nodes.
306func (gen *simpleGraph) addEdge(dst encoding.Builder, to *ast.Edge, attrs []*ast.Attr) []graph.Node {
307	if !gen.directed && to.Directed {
308		panic(fmt.Errorf("directed edge to %v in undirected graph", to.Vertex))
309	}
310	fs := gen.addVertex(dst, to.Vertex)
311	if to.To != nil {
312		ts := gen.addEdge(dst, to.To, attrs)
313		for _, f := range fs {
314			for _, t := range ts {
315				edge := dst.NewEdge(f, t)
316				dst.SetEdge(edge)
317				applyPortsToEdge(to.Vertex, to.To, edge)
318				addEdgeAttrs(edge, attrs)
319			}
320		}
321	}
322	return fs
323}
324
325// pushSubgraph pushes the node start index of the active subgraph onto the
326// stack.
327func (gen *generator) pushSubgraph() {
328	gen.subStart = append(gen.subStart, len(gen.subNodes))
329}
330
331// popSubgraph pops the node start index of the active subgraph from the stack,
332// and returns the nodes processed since.
333func (gen *generator) popSubgraph() []graph.Node {
334	// Get nodes processed since the subgraph became active.
335	start := gen.subStart[len(gen.subStart)-1]
336	// TODO: Figure out a better way to store subgraph nodes, so that duplicates
337	// may not occur.
338	nodes := unique(gen.subNodes[start:])
339	// Remove subgraph from stack.
340	gen.subStart = gen.subStart[:len(gen.subStart)-1]
341	if len(gen.subStart) == 0 {
342		// Remove subgraph nodes when the bottom-most subgraph has been processed.
343		gen.subNodes = gen.subNodes[:0]
344	}
345	return nodes
346}
347
348// unique returns the set of unique nodes contained within ns.
349func unique(ns []graph.Node) []graph.Node {
350	var nodes []graph.Node
351	seen := make(set.Int64s)
352	for _, n := range ns {
353		id := n.ID()
354		if seen.Has(id) {
355			// skip duplicate node
356			continue
357		}
358		seen.Add(id)
359		nodes = append(nodes, n)
360	}
361	return nodes
362}
363
364// isInSubgraph reports whether the active context is within a subgraph, that is
365// to be used as a vertex of an edge.
366func (gen *generator) isInSubgraph() bool {
367	return len(gen.subStart) > 0
368}
369
370// appendSubgraphNode appends the given node to the slice of nodes processed
371// within the context of a subgraph.
372func (gen *generator) appendSubgraphNode(n graph.Node) {
373	gen.subNodes = append(gen.subNodes, n)
374}
375
376type multiGraph struct{ generator }
377
378// addStmt adds the given statement to the multigraph.
379func (gen *multiGraph) addStmt(dst encoding.MultiBuilder, stmt ast.Stmt) {
380	switch stmt := stmt.(type) {
381	case *ast.NodeStmt:
382		n, ok := gen.node(dst, stmt.Node.ID).(encoding.AttributeSetter)
383		if !ok {
384			return
385		}
386		for _, attr := range stmt.Attrs {
387			a := encoding.Attribute{
388				Key:   unquoteID(attr.Key),
389				Value: unquoteID(attr.Val),
390			}
391			if err := n.SetAttribute(a); err != nil {
392				panic(fmt.Errorf("unable to unmarshal node DOT attribute (%s=%s): %v", a.Key, a.Value, err))
393			}
394		}
395	case *ast.EdgeStmt:
396		gen.addEdgeStmt(dst, stmt)
397	case *ast.AttrStmt:
398		var n encoding.AttributeSetter
399		var dst string
400		switch stmt.Kind {
401		case ast.GraphKind:
402			if gen.graphAttr == nil {
403				return
404			}
405			n = gen.graphAttr
406			dst = "graph"
407		case ast.NodeKind:
408			if gen.nodeAttr == nil {
409				return
410			}
411			n = gen.nodeAttr
412			dst = "node"
413		case ast.EdgeKind:
414			if gen.edgeAttr == nil {
415				return
416			}
417			n = gen.edgeAttr
418			dst = "edge"
419		default:
420			panic("unreachable")
421		}
422		for _, attr := range stmt.Attrs {
423			a := encoding.Attribute{
424				Key:   unquoteID(attr.Key),
425				Value: unquoteID(attr.Val),
426			}
427			if err := n.SetAttribute(a); err != nil {
428				panic(fmt.Errorf("unable to unmarshal global %s DOT attribute (%s=%s): %v", dst, a.Key, a.Value, err))
429			}
430		}
431	case *ast.Attr:
432		// ignore.
433	case *ast.Subgraph:
434		for _, stmt := range stmt.Stmts {
435			gen.addStmt(dst, stmt)
436		}
437	default:
438		panic(fmt.Sprintf("unknown statement type %T", stmt))
439	}
440}
441
442// addEdgeStmt adds the given edge statement to the multigraph.
443func (gen *multiGraph) addEdgeStmt(dst encoding.MultiBuilder, stmt *ast.EdgeStmt) {
444	fs := gen.addVertex(dst, stmt.From)
445	ts := gen.addLine(dst, stmt.To, stmt.Attrs)
446	for _, f := range fs {
447		for _, t := range ts {
448			edge := dst.NewLine(f, t)
449			dst.SetLine(edge)
450			applyPortsToEdge(stmt.From, stmt.To, edge)
451			addEdgeAttrs(edge, stmt.Attrs)
452		}
453	}
454}
455
456// addVertex adds the given vertex to the multigraph, and returns its set of nodes.
457func (gen *multiGraph) addVertex(dst encoding.MultiBuilder, v ast.Vertex) []graph.Node {
458	switch v := v.(type) {
459	case *ast.Node:
460		n := gen.node(dst, v.ID)
461		return []graph.Node{n}
462	case *ast.Subgraph:
463		gen.pushSubgraph()
464		for _, stmt := range v.Stmts {
465			gen.addStmt(dst, stmt)
466		}
467		return gen.popSubgraph()
468	default:
469		panic(fmt.Sprintf("unknown vertex type %T", v))
470	}
471}
472
473// addLine adds the given edge to the multigraph, and returns its set of nodes.
474func (gen *multiGraph) addLine(dst encoding.MultiBuilder, to *ast.Edge, attrs []*ast.Attr) []graph.Node {
475	if !gen.directed && to.Directed {
476		panic(fmt.Errorf("directed edge to %v in undirected graph", to.Vertex))
477	}
478	fs := gen.addVertex(dst, to.Vertex)
479	if to.To != nil {
480		ts := gen.addLine(dst, to.To, attrs)
481		for _, f := range fs {
482			for _, t := range ts {
483				edge := dst.NewLine(f, t)
484				dst.SetLine(edge)
485				applyPortsToEdge(to.Vertex, to.To, edge)
486				addEdgeAttrs(edge, attrs)
487			}
488		}
489	}
490	return fs
491}
492
493// addEdgeAttrs adds the attributes to the given edge.
494func addEdgeAttrs(edge basicEdge, attrs []*ast.Attr) {
495	e, ok := edge.(encoding.AttributeSetter)
496	if !ok {
497		return
498	}
499	for _, attr := range attrs {
500		a := encoding.Attribute{
501			Key:   unquoteID(attr.Key),
502			Value: unquoteID(attr.Val),
503		}
504		if err := e.SetAttribute(a); err != nil {
505			panic(fmt.Errorf("unable to unmarshal edge DOT attribute (%s=%s): %v", a.Key, a.Value, err))
506		}
507	}
508}
509
510// unquoteID unquotes the given string if needed in the context of an ID. If s
511// is not already quoted the original string is returned.
512func unquoteID(s string) string {
513	// To make round-trips idempotent, don't unquote quoted HTML-like strings
514	//
515	//    /^"<.*>"$/
516	if len(s) >= 4 && strings.HasPrefix(s, `"<`) && strings.HasSuffix(s, `>"`) {
517		return s
518	}
519	// Unquote quoted string if possible.
520	if t, err := strconv.Unquote(s); err == nil {
521		return t
522	}
523	// On error, either s is not quoted or s is quoted but contains invalid
524	// characters, in both cases we return the original string rather than
525	// panicking.
526	return s
527}
528