1# coding: utf-8 2 3""" 4Exposes a parse() function to parse template strings. 5 6""" 7 8import re 9 10from pystache import defaults 11from pystache.parsed import ParsedTemplate 12 13 14END_OF_LINE_CHARACTERS = ['\r', '\n'] 15NON_BLANK_RE = re.compile(r'^(.)', re.M) 16 17 18# TODO: add some unit tests for this. 19# TODO: add a test case that checks for spurious spaces. 20# TODO: add test cases for delimiters. 21def parse(template, delimiters=None): 22 """ 23 Parse a unicode template string and return a ParsedTemplate instance. 24 25 Arguments: 26 27 template: a unicode template string. 28 29 delimiters: a 2-tuple of delimiters. Defaults to the package default. 30 31 Examples: 32 33 >>> parsed = parse("Hey {{#who}}{{name}}!{{/who}}") 34 >>> print(str(parsed).replace('u', '')) # This is an old hack. 35 ['Hey ', _SectionNode(key='who', index_begin=12, index_end=21, parsed=[_EscapeNode(key='name'), '!'])] 36 37 """ 38 if type(template) is not str: 39 raise Exception('Template is not unicode: %s' % type(template)) 40 parser = _Parser(delimiters) 41 return parser.parse(template) 42 43 44def _compile_template_re(delimiters): 45 """ 46 Return a regular expression object (re.RegexObject) instance. 47 48 """ 49 # The possible tag type characters following the opening tag, 50 # excluding "=" and "{". 51 tag_types = '!>&/#^' 52 53 # TODO: are we following this in the spec? 54 # 55 # The tag's content MUST be a non-whitespace character sequence 56 # NOT containing the current closing delimiter. 57 # 58 tag = r""" 59 (?P<whitespace>[\ \t]*) 60 %(otag)s \s* 61 (?: 62 (?P<change>=) \s* (?P<delims>.+?) \s* = | 63 (?P<raw>{) \s* (?P<raw_name>.+?) \s* } | 64 (?P<tag>[%(tag_types)s]?) \s* (?P<tag_key>[\s\S]+?) 65 ) 66 \s* %(ctag)s 67 """ % { 68 'tag_types': tag_types, 69 'otag': re.escape(delimiters[0]), 70 'ctag': re.escape(delimiters[1]), 71 } 72 73 return re.compile(tag, re.VERBOSE) 74 75 76class ParsingError(Exception): 77 78 pass 79 80 81## Node types 82 83 84def _format(obj, exclude=None): 85 if exclude is None: 86 exclude = [] 87 exclude.append('key') 88 attrs = obj.__dict__ 89 names = list(set(attrs.keys()) - set(exclude)) 90 names.sort() 91 names.insert(0, 'key') 92 args = ['%s=%s' % (name, repr(attrs[name])) for name in names] 93 return '%s(%s)' % (obj.__class__.__name__, ', '.join(args)) 94 95 96class _CommentNode(object): 97 def __repr__(self): 98 return _format(self) 99 100 def render(self, engine, context): 101 return '' 102 103 104class _ChangeNode(object): 105 def __init__(self, delimiters): 106 self.delimiters = delimiters 107 108 def __repr__(self): 109 return _format(self) 110 111 def render(self, engine, context): 112 return '' 113 114 115class _EscapeNode(object): 116 def __init__(self, key): 117 self.key = key 118 119 def __repr__(self): 120 return _format(self) 121 122 def render(self, engine, context): 123 s = engine.fetch_string(context, self.key) 124 return engine.escape(s) 125 126 127class _LiteralNode(object): 128 def __init__(self, key): 129 self.key = key 130 131 def __repr__(self): 132 return _format(self) 133 134 def render(self, engine, context): 135 s = engine.fetch_string(context, self.key) 136 return engine.literal(s) 137 138 139class _PartialNode(object): 140 def __init__(self, key, indent): 141 self.key = key 142 self.indent = indent 143 144 def __repr__(self): 145 return _format(self) 146 147 def render(self, engine, context): 148 template = engine.resolve_partial(self.key) 149 # Indent before rendering. 150 template = re.sub(NON_BLANK_RE, self.indent + r'\1', template) 151 152 return engine.render(template, context) 153 154 155class _InvertedNode(object): 156 def __init__(self, key, parsed_section): 157 self.key = key 158 self.parsed_section = parsed_section 159 160 def __repr__(self): 161 return _format(self) 162 163 def render(self, engine, context): 164 # TODO: is there a bug because we are not using the same 165 # logic as in fetch_string()? 166 data = engine.resolve_context(context, self.key) 167 # Note that lambdas are considered truthy for inverted sections 168 # per the spec. 169 if data: 170 return '' 171 return self.parsed_section.render(engine, context) 172 173 174class _SectionNode(object): 175 176 # TODO: the template_ and parsed_template_ arguments don't both seem 177 # to be necessary. Can we remove one of them? For example, if 178 # callable(data) is True, then the initial parsed_template isn't used. 179 def __init__(self, key, parsed, delimiters, template, index_begin, index_end): 180 self.delimiters = delimiters 181 self.key = key 182 self.parsed = parsed 183 self.template = template 184 self.index_begin = index_begin 185 self.index_end = index_end 186 187 def __repr__(self): 188 return _format(self, exclude=['delimiters', 'template']) 189 190 def render(self, engine, context): 191 values = engine.fetch_section_data(context, self.key) 192 193 parts = [] 194 for val in values: 195 if callable(val): 196 # Lambdas special case section rendering and bypass pushing 197 # the data value onto the context stack. From the spec-- 198 # 199 # When used as the data value for a Section tag, the 200 # lambda MUST be treatable as an arity 1 function, and 201 # invoked as such (passing a String containing the 202 # unprocessed section contents). The returned value 203 # MUST be rendered against the current delimiters, then 204 # interpolated in place of the section. 205 # 206 # Also see-- 207 # 208 # https://github.com/defunkt/pystache/issues/113 209 # 210 # TODO: should we check the arity? 211 val = val(self.template[self.index_begin : self.index_end]) 212 val = engine._render_value(val, context, delimiters=self.delimiters) 213 parts.append(val) 214 continue 215 216 context.push(val) 217 parts.append(self.parsed.render(engine, context)) 218 context.pop() 219 220 return str(''.join(parts)) 221 222 223class _Parser(object): 224 225 _delimiters = None 226 _template_re = None 227 228 def __init__(self, delimiters=None): 229 if delimiters is None: 230 delimiters = defaults.DELIMITERS 231 232 self._delimiters = delimiters 233 234 def _compile_delimiters(self): 235 self._template_re = _compile_template_re(self._delimiters) 236 237 def _change_delimiters(self, delimiters): 238 self._delimiters = delimiters 239 self._compile_delimiters() 240 241 def parse(self, template): 242 """ 243 Parse a template string starting at some index. 244 245 This method uses the current tag delimiter. 246 247 Arguments: 248 249 template: a unicode string that is the template to parse. 250 251 index: the index at which to start parsing. 252 253 Returns: 254 255 a ParsedTemplate instance. 256 257 """ 258 self._compile_delimiters() 259 260 start_index = 0 261 parsed_section, section_key = None, None 262 parsed_template = ParsedTemplate() 263 264 states = [] 265 266 while True: 267 match = self._template_re.search(template, start_index) 268 269 if match is None: 270 break 271 272 match_index = match.start() 273 end_index = match.end() 274 275 matches = match.groupdict() 276 277 # Normalize the matches dictionary. 278 if matches['change'] is not None: 279 matches.update(tag='=', tag_key=matches['delims']) 280 elif matches['raw'] is not None: 281 matches.update(tag='&', tag_key=matches['raw_name']) 282 283 tag_type = matches['tag'] 284 tag_key = matches['tag_key'] 285 leading_whitespace = matches['whitespace'] 286 287 # Standalone (non-interpolation) tags consume the entire line, 288 # both leading whitespace and trailing newline. 289 did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS 290 did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS 291 is_tag_interpolating = tag_type in ['', '&'] 292 293 if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating: 294 if end_index < len(template): 295 end_index += template[end_index] == '\r' and 1 or 0 296 if end_index < len(template): 297 end_index += template[end_index] == '\n' and 1 or 0 298 elif leading_whitespace: 299 match_index += len(leading_whitespace) 300 leading_whitespace = '' 301 302 # Avoid adding spurious empty strings to the parse tree. 303 if start_index != match_index: 304 parsed_template.add(template[start_index:match_index]) 305 306 start_index = end_index 307 308 if tag_type in ('#', '^'): 309 # Cache current state. 310 state = (tag_type, end_index, section_key, parsed_template) 311 states.append(state) 312 313 # Initialize new state 314 section_key, parsed_template = tag_key, ParsedTemplate() 315 continue 316 317 if tag_type == '/': 318 if tag_key != section_key: 319 raise ParsingError('Section end tag mismatch: %s != %s' % (tag_key, section_key)) 320 321 # Restore previous state with newly found section data. 322 parsed_section = parsed_template 323 324 ( 325 tag_type, 326 section_start_index, 327 section_key, 328 parsed_template, 329 ) = states.pop() 330 node = self._make_section_node( 331 template, 332 tag_type, 333 tag_key, 334 parsed_section, 335 section_start_index, 336 match_index, 337 ) 338 339 else: 340 node = self._make_interpolation_node(tag_type, tag_key, leading_whitespace) 341 342 parsed_template.add(node) 343 344 # Avoid adding spurious empty strings to the parse tree. 345 if start_index != len(template): 346 parsed_template.add(template[start_index:]) 347 348 return parsed_template 349 350 def _make_interpolation_node(self, tag_type, tag_key, leading_whitespace): 351 """ 352 Create and return a non-section node for the parse tree. 353 354 """ 355 # TODO: switch to using a dictionary instead of a bunch of ifs and elifs. 356 if tag_type == '!': 357 return _CommentNode() 358 359 if tag_type == '=': 360 delimiters = tag_key.split() 361 self._change_delimiters(delimiters) 362 return _ChangeNode(delimiters) 363 364 if tag_type == '': 365 return _EscapeNode(tag_key) 366 367 if tag_type == '&': 368 return _LiteralNode(tag_key) 369 370 if tag_type == '>': 371 return _PartialNode(tag_key, leading_whitespace) 372 373 raise Exception('Invalid symbol for interpolation tag: %s' % repr(tag_type)) 374 375 def _make_section_node( 376 self, 377 template, 378 tag_type, 379 tag_key, 380 parsed_section, 381 section_start_index, 382 section_end_index, 383 ): 384 """ 385 Create and return a section node for the parse tree. 386 387 """ 388 if tag_type == '#': 389 return _SectionNode( 390 tag_key, 391 parsed_section, 392 self._delimiters, 393 template, 394 section_start_index, 395 section_end_index, 396 ) 397 398 if tag_type == '^': 399 return _InvertedNode(tag_key, parsed_section) 400 401 raise Exception('Invalid symbol for section tag: %s' % repr(tag_type)) 402