1""" 2Handling of tree structures given in a special 'dotted' syntax. 3This represents trees of nodes with strings as tags, 4in a readable and writable and easy to parse syntax. 5 6There are two main functions, unparse_sexpr and parse_string. 7When parsing, the result is by default given in 'sexpr' format: 8each node is a tuple of the form 9 10 (tag, ) or (tag, node) or (tag, node, node) ... 11 12The following invariant is intended to hold for every node x, 13 14 parse_string(unparse_sexpr(x)) == x 15 16Currently the following invariant has been tested for some strings: 17 18 unparse_sexpr(parse_string(s)).strip() == s.strip() 19 20[It only holds on stripped results but may be fixed sometime.] 21 22""" 23 24 25class Node(object): 26 __slots__ = 'tag', 'children', 'index', 27 28 def __init__(self, tag, children, index): 29 self.tag = tag 30 self.children = children 31 self.index = index 32 33 def as_sexpr(self): 34 return (self.tag,) + tuple([c.as_sexpr() for c in self.children]) 35 36 def __repr__(self): 37 return '%s(%r, %r, %r)' % ( 38 self.__class__.__name__, 39 self.tag, 40 self.children, 41 self.index) 42 43 44class _GLUECLAMP_: 45 46 _imports_ = ( 47 '_parent.FileIO:IO', 48 ) 49 50 ## 51 # The name of attributes that are configurable in instances. 52 # 53 _chgable_ = 'node', 'dotchar' 54 55 ## 56 # The character that begins the 'dotted' indentation. 57 dotchar = '.' 58 59 ## 60 # The character that quotes lines beginning with dots and itself. 61 quotechar = '\\' 62 63 ## 64 # Construct a new node. 65 # @return 66 # This variant returns a tuple in s-expression form. 67 # @param tag a string 68 # @param children a sequence of nodes 69 # @param lineindex line index of tag, not used in s-expressions 70 71 def node_sexpr(self, tag, children, lineindex): 72 return (tag,) + tuple(children) 73 74 ## 75 # Construct a new node. 76 # @return 77 # This variant returns a Node object. 78 # @param tag a string 79 # @param children a sequence of nodes 80 # @param lineindex line index of beginning tag, first line = 0 81 82 def node_node(self, tag, children, lineindex): 83 return Node(tag, tuple(children), lineindex) 84 85 node = node_node 86 87 def parse_file(self, file, src=None): 88 return self.parse_string(self.IO.read_file(file), src) 89 90 ## 91 # Parse a dotted tree text given as a sequence of lines. 92 # @param pos 93 # @param tag list with first line of tag, if any 94 # @param lineindex line index of tag 95 # @param it iterator yielding remaining lines 96 # @return a triple (index, nextvar, node) where 97 # index is the index of line 'nextvar', 98 # nextvar is the first line of next node to parse, and 99 # node is the resulting node of this parse. 100 101 def parse_iter(self, pos, tag, lineindex, it, src=None): 102 dotchar = self.dotchar 103 quotechar = self.quotechar 104 children = [] 105 firstline = lineindex 106 107 while 1: 108 try: 109 lineindex, nextvar = next(it) 110 except StopIteration: 111 nextvar = None 112 break 113 if not nextvar.startswith(dotchar): 114 tag.append(nextvar) 115 else: 116 break 117 for (i, t) in enumerate(tag): 118 if (t.startswith(quotechar+dotchar) or 119 t.startswith(quotechar+quotechar+dotchar)): 120 tag[i] = t[len(quotechar):] 121 if tag == ['']: 122 tag = '\n' 123 else: 124 tag = '\n'.join(tag) 125 while 1: 126 if (nextvar is None or len(nextvar) <= pos 127 or nextvar[pos] != dotchar or 128 not nextvar.startswith(dotchar*(pos+1))): 129 return lineindex, nextvar, self.node(tag, children, firstline) 130 if len(nextvar) > pos+1 and nextvar[pos+1] == dotchar: 131 if src is None: 132 raise SyntaxError('Level must increase with 1 max') 133 else: 134 src.error('Level must increase with 1 max', lineindex) 135 lineindex, nextvar, child = self.parse_iter(pos+1, [nextvar[pos+1:]], 136 lineindex, it, src) 137 children.append(child) 138 139 def parse_lines(self, lines, src=None): 140 it = enumerate(lines) 141 lineindex, nextvar, node = self.parse_iter(0, [], 0, it, src) 142 assert nextvar is None 143 return node 144 145 def parse_string(self, string, src=None): 146 if string: 147 lines = string.split('\n') 148 else: 149 lines = [] 150 return self.parse_lines(lines, src) 151 152 ## 153 # Unparse a tree given on Node form 154 155 def unparse_node(self, node): 156 return self.unparse_sexpr(node.as_sexpr()) 157 158 ## 159 # Unparse a tree given on sexpr form 160 # @return a string in dotted-tree form 161 def unparse_sexpr(self, sexpr): 162 li = [] 163 164 def unparse(depth, sexpr): 165 li.append(self.unparse_tag(depth, sexpr[0])) 166 for x in sexpr[1:]: 167 unparse(depth+1, x) 168 169 unparse(0, sexpr) 170 return '\n'.join(li) 171 172 def unparse_tag(self, depth, tag): 173 dotchar, quotechar = self.dotchar, self.quotechar 174 tag = tag.split('\n') 175 for (i, t) in enumerate(tag): 176 if (t.startswith(dotchar) or 177 t.startswith(quotechar + dotchar)): 178 tag[i] = quotechar + t 179 tag = '\n'.join(tag) 180 tag = dotchar*depth+tag 181 return tag 182 183 184def test_1(): 185 # Test parsing to sexpr's and back 186 # for a variety of cases 187 from guppy import Root 188 dt = Root().guppy.gsl.DottedTree 189 dt.node = dt.node_sexpr 190 parse = dt.parse_string 191 unparse = dt.unparse_sexpr 192 193 for x, y in [ 194 ['', ('',)], 195 ['a', ('a',)], 196 ['.a', ('', ('a',))], 197 ['a\n.b', ('a', ('b',))], 198 ['a\nb\n.c', ('a\nb', ('c',))], 199 ["""\n.a\n..a""", ('\n', ('a', ('a',)))], 200 ["""hello\n.a\n.b\n..ba\nx\n..bb""", 201 ('hello', ('a',), ('b', ('ba\nx',), ('bb',)))], 202 # Quoting dots 203 [r'\.', ('.',)], 204 [r'.\.', ('', ('.',))], 205 # Preserving quote 206 ['\\', ('\\',)], 207 ['.\n\\', ('', ('\n\\',))], 208 # Quoting quote-dots 209 [r'\\.', (r'\.',)], 210 # Preserving whitespace starting a tag 211 # Or should it be stripped? I think better not, it would complicate transparency. 212 [r'. tag', ('', (' tag', ))], 213 214 # Preserving initial whitespace 215 [' ', (' ',)], 216 # Preserving initial newline 217 ['\n', ('\n',)], 218 ['\na', ('\na',)], 219 220 # A n intended usage example 221 [''' 222initial 223text 224.aspect for guppy.hsp 225..returns 226...type A 227...latex 228~\\ 229\..~|begincolorbox|~raw::~LaTeX~\\ 230~\\ 231~~~{\textbackslash}{\textbackslash}begin{\{}center{\}}~\\ 232.aspect for guppy.gsl 233..contains DottedTree 234''', 235 236 ('\ninitial\ntext', 237 ('aspect for guppy.hsp', 238 ('returns', 239 ('type A',), 240 ('latex\n~\\\n..~|begincolorbox|~raw::~LaTeX~\\\n~\\\n~~~{\textbackslash}{\textbackslash}begin{\\{}center{\\}}~\\',))), 241 ('aspect for guppy.gsl', 242 ('contains DottedTree\n',)))] 243 244 ]: 245 z = parse(x) 246 if y is not None: 247 assert z == y 248 assert unparse(z).strip() == x.strip() 249 250 # Unparsing x and then parsing should give back x transparently 251 # for any tree x, involving any combination of dots, quotes and other characters. 252 253 # List of special chars and one normal 254 chars = [dt.quotechar, dt.dotchar, '\n', ' ', 'a'] 255 256 import random 257 258 ## 259 # Generate a random node with random number of children. 260 # Shuffles the chars list to make the tag string. 261 # @param maxchild maximum number of children 262 def randnode(maxchild): 263 numchild = random.randint(0, maxchild) 264 random.shuffle(chars) 265 tag = ''.join(chars) 266 children = [randnode(maxchild-1) for i in range(numchild)] 267 return dt.node(tag, children, 0) 268 269 for i in range(10): 270 y = randnode(3) 271 x = unparse(y) 272 z = parse(x) 273 assert z == y 274 275 276def test_2(): 277 # Test parsing to Node 278 # that the line lineindex are correct 279 # They start from 0, since enumerate() generates them, 280 # It seemed inconsistent to change them to start from 1. 281 # Which will be made in error prints. 282 283 from guppy import Root 284 dt = Root().guppy.gsl.DottedTree 285 parse = dt.parse_string 286 unparse = dt.unparse_node 287 288 node = parse("""\ 289line 0 290.line 1 291..line 2 292line 3 293.line 4 294""") 295 296 exp = Node('line 0', ( 297 Node('line 1', 298 (Node('line 2\nline 3', (), 2),), 1), 299 Node('line 4\n', (), 4)), 0) 300 assert repr(node) == repr(exp) 301 302 303def test_main(): 304 test_1() 305 test_2() 306