1#############################################################################
2# Copyright (c) 2019 Balabit
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of the GNU General Public License version 2 as published
6# by the Free Software Foundation, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16#
17# As an additional exemption you are allowed to compile & link against the
18# OpenSSL libraries as published by the OpenSSL project. See the file
19# COPYING for details.
20#
21#############################################################################
22
23import xml.etree.ElementTree as xml_parser
24from os import remove
25from subprocess import DEVNULL, Popen
26from tempfile import NamedTemporaryFile
27
28import networkx
29
30
31class Rule():
32    def __init__(self, number, parent, symbols):
33        self.number = number
34        self.parent = parent
35        self.symbols = symbols
36
37
38def _run_in_shell(command):
39    proc = Popen(command, stderr=DEVNULL, stdout=DEVNULL)
40    proc.wait()
41    return proc.returncode == 0
42
43
44def _write_to_file(content):
45    file = NamedTemporaryFile(mode='w')
46    file.write(content)
47    file.flush()
48    file.seek(0)
49    return file
50
51
52def _yacc2xml(yacc_content):
53    with _write_to_file(yacc_content) as file:
54        xml_filepath = NamedTemporaryFile().name
55        try:
56            if not _run_in_shell(['bison', '--xml=' + xml_filepath, '--output=/dev/null', file.name]):
57                raise Exception('Failed to convert to xml:\n{}\n'.format(yacc_content))
58        except FileNotFoundError:
59            raise Exception('bison executable not found')
60        return xml_filepath
61
62
63def _xml2rules(filename):
64    rules = []
65    root = xml_parser.parse(filename).getroot()
66    for rule in root.iter('rule'):
67        number = int(rule.get('number'))
68        parent = rule.find('lhs').text
69        symbols = [symbol.text for symbol in rule.find('rhs') if symbol.tag != 'empty']
70        rules.append(Rule(number, parent, symbols))
71    return rules
72
73
74def _yacc2rules(yacc):
75    xml = _yacc2xml(yacc)
76    rules = _xml2rules(xml)
77    remove(xml)
78    return rules
79
80
81def _rules2graph(rules):
82    graph = networkx.MultiDiGraph()
83    for rule in rules:
84        rule_node = str(rule.number)
85        graph.add_edge(rule.parent, rule_node)
86        for index, symbol in enumerate(rule.symbols):
87            graph.add_edge(rule_node, symbol, index=index)
88    return graph
89
90
91# yacc2graph:
92# The structure of the graph is the following:
93#  * The graph is directed
94#  * A node can either be a symbol (terminal/nonterminal) or a rule number
95#  * The edges connect the parent nodes to its children nodes
96#  * Rule numbers always have symbols as children
97#  * Symbols always have rule numbers as children
98#  * Edges can be indexed
99#  * When we are traversing through the graph, if a node's edges
100#    are indexed, the edges must be traversed in ascending order,
101#    if they are not indexed, the order does not matter
102#  * Rule numbers' edges to their children are indexed
103#  * Symbols' edges to their children are not indexed
104#
105# In simple words, take the following yacc syntax:
106# %token test1
107# %token test1next
108# %token test2
109# %token test2next
110# %%
111# start
112#     : test                      # rule number 0
113#     ;
114# test
115#     : test1 test1next test1next # rule number 1
116#     | test2 test2next test      # rule number 2
117#     ;
118#
119# * 'start' and 'test' are nonterminal symbols
120# * test1, test1next, test2, test2next are terminal symbols
121# * Every line, starting with a ':' or a '|' are rules, and they are numbered
122#
123# * The child of 'start' is 'rule number 0', and it is not indexed
124# * The child of 'rule number 0' is 'test', and it is indexed
125# * The children of 'test' are 'rule number 0' and 'rule number 1', and they are not indexed
126# * The children of 'rule number 1' are 'test1', 'test1next' and 'test1next' and they are indexed
127# * The children of 'rule number 2' are 'test2', 'test2next' and 'test', and they are indexed
128#
129# (See the unit tests for more examples)
130def yacc2graph(yacc):
131    return _rules2graph(_yacc2rules(yacc))
132