1############################################################################# 2# Copyright (c) 2019 Balabit 3# 4# This program is free software; you can redistribute it and/or modify it 5# under the terms of the GNU General Public License version 2 as published 6# by the Free Software Foundation, or (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# 17# As an additional exemption you are allowed to compile & link against the 18# OpenSSL libraries as published by the OpenSSL project. See the file 19# COPYING for details. 20# 21############################################################################# 22 23import xml.etree.ElementTree as xml_parser 24from os import remove 25from subprocess import DEVNULL, Popen 26from tempfile import NamedTemporaryFile 27 28import networkx 29 30 31class Rule(): 32 def __init__(self, number, parent, symbols): 33 self.number = number 34 self.parent = parent 35 self.symbols = symbols 36 37 38def _run_in_shell(command): 39 proc = Popen(command, stderr=DEVNULL, stdout=DEVNULL) 40 proc.wait() 41 return proc.returncode == 0 42 43 44def _write_to_file(content): 45 file = NamedTemporaryFile(mode='w') 46 file.write(content) 47 file.flush() 48 file.seek(0) 49 return file 50 51 52def _yacc2xml(yacc_content): 53 with _write_to_file(yacc_content) as file: 54 xml_filepath = NamedTemporaryFile().name 55 try: 56 if not _run_in_shell(['bison', '--xml=' + xml_filepath, '--output=/dev/null', file.name]): 57 raise Exception('Failed to convert to xml:\n{}\n'.format(yacc_content)) 58 except FileNotFoundError: 59 raise Exception('bison executable not found') 60 return xml_filepath 61 62 63def _xml2rules(filename): 64 rules = [] 65 root = xml_parser.parse(filename).getroot() 66 for rule in root.iter('rule'): 67 number = int(rule.get('number')) 68 parent = rule.find('lhs').text 69 symbols = [symbol.text for symbol in rule.find('rhs') if symbol.tag != 'empty'] 70 rules.append(Rule(number, parent, symbols)) 71 return rules 72 73 74def _yacc2rules(yacc): 75 xml = _yacc2xml(yacc) 76 rules = _xml2rules(xml) 77 remove(xml) 78 return rules 79 80 81def _rules2graph(rules): 82 graph = networkx.MultiDiGraph() 83 for rule in rules: 84 rule_node = str(rule.number) 85 graph.add_edge(rule.parent, rule_node) 86 for index, symbol in enumerate(rule.symbols): 87 graph.add_edge(rule_node, symbol, index=index) 88 return graph 89 90 91# yacc2graph: 92# The structure of the graph is the following: 93# * The graph is directed 94# * A node can either be a symbol (terminal/nonterminal) or a rule number 95# * The edges connect the parent nodes to its children nodes 96# * Rule numbers always have symbols as children 97# * Symbols always have rule numbers as children 98# * Edges can be indexed 99# * When we are traversing through the graph, if a node's edges 100# are indexed, the edges must be traversed in ascending order, 101# if they are not indexed, the order does not matter 102# * Rule numbers' edges to their children are indexed 103# * Symbols' edges to their children are not indexed 104# 105# In simple words, take the following yacc syntax: 106# %token test1 107# %token test1next 108# %token test2 109# %token test2next 110# %% 111# start 112# : test # rule number 0 113# ; 114# test 115# : test1 test1next test1next # rule number 1 116# | test2 test2next test # rule number 2 117# ; 118# 119# * 'start' and 'test' are nonterminal symbols 120# * test1, test1next, test2, test2next are terminal symbols 121# * Every line, starting with a ':' or a '|' are rules, and they are numbered 122# 123# * The child of 'start' is 'rule number 0', and it is not indexed 124# * The child of 'rule number 0' is 'test', and it is indexed 125# * The children of 'test' are 'rule number 0' and 'rule number 1', and they are not indexed 126# * The children of 'rule number 1' are 'test1', 'test1next' and 'test1next' and they are indexed 127# * The children of 'rule number 2' are 'test2', 'test2next' and 'test', and they are indexed 128# 129# (See the unit tests for more examples) 130def yacc2graph(yacc): 131 return _rules2graph(_yacc2rules(yacc)) 132