1"""
2Handling of tree structures given in a special 'dotted' syntax.
3This represents trees of nodes with strings as tags,
4in a readable and writable and easy to parse syntax.
5
6There are two main functions, unparse_sexpr and parse_string.
7When parsing, the result is by default given in 'sexpr' format:
8each node is a tuple of the form
9
10    (tag, ) or (tag, node) or (tag, node, node) ...
11
12The following invariant is intended to hold for every node x,
13
14    parse_string(unparse_sexpr(x)) == x
15
16Currently the following invariant has been tested for some strings:
17
18    unparse_sexpr(parse_string(s)).strip() == s.strip()
19
20[It only holds on stripped results but may be fixed sometime.]
21
22"""
23
24
25class Node(object):
26    __slots__ = 'tag', 'children', 'index',
27
28    def __init__(self, tag, children, index):
29        self.tag = tag
30        self.children = children
31        self.index = index
32
33    def as_sexpr(self):
34        return (self.tag,) + tuple([c.as_sexpr() for c in self.children])
35
36    def __repr__(self):
37        return '%s(%r, %r, %r)' % (
38            self.__class__.__name__,
39            self.tag,
40            self.children,
41            self.index)
42
43
44class _GLUECLAMP_:
45
46    _imports_ = (
47        '_parent.FileIO:IO',
48    )
49
50    ##
51    # The name of attributes that are configurable in instances.
52    #
53    _chgable_ = 'node', 'dotchar'
54
55    ##
56    # The character that begins the 'dotted' indentation.
57    dotchar = '.'
58
59    ##
60    # The character that quotes lines beginning with dots and itself.
61    quotechar = '\\'
62
63    ##
64    # Construct a new node.
65    # @return
66    # This variant returns a tuple in s-expression form.
67    # @param tag a string
68    # @param children a sequence of nodes
69    # @param lineindex line index of tag, not used in s-expressions
70
71    def node_sexpr(self, tag, children, lineindex):
72        return (tag,) + tuple(children)
73
74    ##
75    # Construct a new node.
76    # @return
77    # This variant returns a Node object.
78    # @param tag a string
79    # @param children a sequence of nodes
80    # @param lineindex line index of beginning tag, first line = 0
81
82    def node_node(self, tag, children, lineindex):
83        return Node(tag, tuple(children), lineindex)
84
85    node = node_node
86
87    def parse_file(self, file, src=None):
88        return self.parse_string(self.IO.read_file(file), src)
89
90    ##
91    # Parse a dotted tree text given as a sequence of lines.
92    # @param pos
93    # @param tag     list with first line of tag, if any
94    # @param lineindex line index of tag
95    # @param it      iterator yielding remaining lines
96    # @return a triple (index, nextvar, node) where
97    # index is the index of line 'nextvar',
98    # nextvar is the first line of next node to parse, and
99    # node is the resulting node of this parse.
100
101    def parse_iter(self, pos, tag, lineindex, it, src=None):
102        dotchar = self.dotchar
103        quotechar = self.quotechar
104        children = []
105        firstline = lineindex
106
107        while 1:
108            try:
109                lineindex, nextvar = next(it)
110            except StopIteration:
111                nextvar = None
112                break
113            if not nextvar.startswith(dotchar):
114                tag.append(nextvar)
115            else:
116                break
117        for (i, t) in enumerate(tag):
118            if (t.startswith(quotechar+dotchar) or
119                    t.startswith(quotechar+quotechar+dotchar)):
120                tag[i] = t[len(quotechar):]
121        if tag == ['']:
122            tag = '\n'
123        else:
124            tag = '\n'.join(tag)
125        while 1:
126            if (nextvar is None or len(nextvar) <= pos
127                or nextvar[pos] != dotchar or
128                    not nextvar.startswith(dotchar*(pos+1))):
129                return lineindex, nextvar, self.node(tag, children, firstline)
130            if len(nextvar) > pos+1 and nextvar[pos+1] == dotchar:
131                if src is None:
132                    raise SyntaxError('Level must increase with 1 max')
133                else:
134                    src.error('Level must increase with 1 max', lineindex)
135            lineindex, nextvar, child = self.parse_iter(pos+1, [nextvar[pos+1:]],
136                                                        lineindex, it, src)
137            children.append(child)
138
139    def parse_lines(self, lines, src=None):
140        it = enumerate(lines)
141        lineindex, nextvar, node = self.parse_iter(0, [], 0, it, src)
142        assert nextvar is None
143        return node
144
145    def parse_string(self, string, src=None):
146        if string:
147            lines = string.split('\n')
148        else:
149            lines = []
150        return self.parse_lines(lines, src)
151
152    ##
153    # Unparse a tree given on Node form
154
155    def unparse_node(self, node):
156        return self.unparse_sexpr(node.as_sexpr())
157
158    ##
159    # Unparse a tree given on sexpr form
160    # @return a string in dotted-tree form
161    def unparse_sexpr(self, sexpr):
162        li = []
163
164        def unparse(depth, sexpr):
165            li.append(self.unparse_tag(depth, sexpr[0]))
166            for x in sexpr[1:]:
167                unparse(depth+1, x)
168
169        unparse(0, sexpr)
170        return '\n'.join(li)
171
172    def unparse_tag(self, depth, tag):
173        dotchar, quotechar = self.dotchar, self.quotechar
174        tag = tag.split('\n')
175        for (i, t) in enumerate(tag):
176            if (t.startswith(dotchar) or
177                    t.startswith(quotechar + dotchar)):
178                tag[i] = quotechar + t
179        tag = '\n'.join(tag)
180        tag = dotchar*depth+tag
181        return tag
182
183
184def test_1():
185    # Test parsing to sexpr's and back
186    # for a variety of cases
187    from guppy import Root
188    dt = Root().guppy.gsl.DottedTree
189    dt.node = dt.node_sexpr
190    parse = dt.parse_string
191    unparse = dt.unparse_sexpr
192
193    for x, y in [
194        ['', ('',)],
195        ['a', ('a',)],
196        ['.a', ('', ('a',))],
197        ['a\n.b', ('a', ('b',))],
198        ['a\nb\n.c', ('a\nb', ('c',))],
199        ["""\n.a\n..a""", ('\n', ('a', ('a',)))],
200        ["""hello\n.a\n.b\n..ba\nx\n..bb""",
201            ('hello', ('a',), ('b', ('ba\nx',), ('bb',)))],
202        # Quoting dots
203        [r'\.', ('.',)],
204        [r'.\.', ('', ('.',))],
205        # Preserving quote
206        ['\\', ('\\',)],
207        ['.\n\\', ('', ('\n\\',))],
208        # Quoting quote-dots
209        [r'\\.', (r'\.',)],
210        # Preserving whitespace starting a tag
211        # Or should it be stripped? I think better not, it would complicate transparency.
212        [r'. tag', ('', (' tag', ))],
213
214        # Preserving initial whitespace
215        [' ', (' ',)],
216        # Preserving initial newline
217        ['\n', ('\n',)],
218        ['\na', ('\na',)],
219
220        # A n intended usage example
221        ['''
222initial
223text
224.aspect for guppy.hsp
225..returns
226...type A
227...latex
228~\\
229\..~|begincolorbox|~raw::~LaTeX~\\
230~\\
231~~~{\textbackslash}{\textbackslash}begin{\{}center{\}}~\\
232.aspect for guppy.gsl
233..contains DottedTree
234''',
235
236         ('\ninitial\ntext',
237          ('aspect for guppy.hsp',
238           ('returns',
239            ('type A',),
240            ('latex\n~\\\n..~|begincolorbox|~raw::~LaTeX~\\\n~\\\n~~~{\textbackslash}{\textbackslash}begin{\\{}center{\\}}~\\',))),
241             ('aspect for guppy.gsl',
242              ('contains DottedTree\n',)))]
243
244    ]:
245        z = parse(x)
246        if y is not None:
247            assert z == y
248        assert unparse(z).strip() == x.strip()
249
250    # Unparsing x and then parsing should give back x transparently
251    # for any tree x, involving any combination of dots, quotes and other characters.
252
253    # List of special chars and one normal
254    chars = [dt.quotechar, dt.dotchar, '\n', ' ', 'a']
255
256    import random
257
258    ##
259    # Generate a random node with random number of children.
260    # Shuffles the chars list to make the tag string.
261    # @param maxchild maximum number of children
262    def randnode(maxchild):
263        numchild = random.randint(0, maxchild)
264        random.shuffle(chars)
265        tag = ''.join(chars)
266        children = [randnode(maxchild-1) for i in range(numchild)]
267        return dt.node(tag, children, 0)
268
269    for i in range(10):
270        y = randnode(3)
271        x = unparse(y)
272        z = parse(x)
273        assert z == y
274
275
276def test_2():
277    # Test parsing to Node
278    # that the line lineindex are correct
279    # They start from 0, since enumerate() generates them,
280    # It seemed inconsistent to change them to start from 1.
281    # Which will be made in error prints.
282
283    from guppy import Root
284    dt = Root().guppy.gsl.DottedTree
285    parse = dt.parse_string
286    unparse = dt.unparse_node
287
288    node = parse("""\
289line 0
290.line 1
291..line 2
292line 3
293.line 4
294""")
295
296    exp = Node('line 0', (
297        Node('line 1',
298             (Node('line 2\nline 3', (), 2),), 1),
299        Node('line 4\n', (), 4)), 0)
300    assert repr(node) == repr(exp)
301
302
303def test_main():
304    test_1()
305    test_2()
306