1# coding: utf-8 2 3""" 4Exposes a parse() function to parse template strings. 5 6""" 7 8import re 9 10from pystache import defaults 11from pystache.parsed import ParsedTemplate 12 13 14END_OF_LINE_CHARACTERS = [u'\r', u'\n'] 15NON_BLANK_RE = re.compile(ur'^(.)', re.M) 16 17 18# TODO: add some unit tests for this. 19# TODO: add a test case that checks for spurious spaces. 20# TODO: add test cases for delimiters. 21def parse(template, delimiters=None): 22 """ 23 Parse a unicode template string and return a ParsedTemplate instance. 24 25 Arguments: 26 27 template: a unicode template string. 28 29 delimiters: a 2-tuple of delimiters. Defaults to the package default. 30 31 Examples: 32 33 >>> parsed = parse(u"Hey {{#who}}{{name}}!{{/who}}") 34 >>> print str(parsed).replace('u', '') # This is a hack to get the test to pass both in Python 2 and 3. 35 ['Hey ', _SectionNode(key='who', index_begin=12, index_end=21, parsed=[_EscapeNode(key='name'), '!'])] 36 37 """ 38 if type(template) is not unicode: 39 raise Exception("Template is not unicode: %s" % type(template)) 40 parser = _Parser(delimiters) 41 return parser.parse(template) 42 43 44def _compile_template_re(delimiters): 45 """ 46 Return a regular expression object (re.RegexObject) instance. 47 48 """ 49 # The possible tag type characters following the opening tag, 50 # excluding "=" and "{". 51 tag_types = "!>&/#^" 52 53 # TODO: are we following this in the spec? 54 # 55 # The tag's content MUST be a non-whitespace character sequence 56 # NOT containing the current closing delimiter. 57 # 58 tag = r""" 59 (?P<whitespace>[\ \t]*) 60 %(otag)s \s* 61 (?: 62 (?P<change>=) \s* (?P<delims>.+?) \s* = | 63 (?P<raw>{) \s* (?P<raw_name>.+?) \s* } | 64 (?P<tag>[%(tag_types)s]?) \s* (?P<tag_key>[\s\S]+?) 65 ) 66 \s* %(ctag)s 67 """ % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])} 68 69 return re.compile(tag, re.VERBOSE) 70 71 72class ParsingError(Exception): 73 74 pass 75 76 77## Node types 78 79def _format(obj, exclude=None): 80 if exclude is None: 81 exclude = [] 82 exclude.append('key') 83 attrs = obj.__dict__ 84 names = list(set(attrs.keys()) - set(exclude)) 85 names.sort() 86 names.insert(0, 'key') 87 args = ["%s=%s" % (name, repr(attrs[name])) for name in names] 88 return "%s(%s)" % (obj.__class__.__name__, ", ".join(args)) 89 90 91class _CommentNode(object): 92 93 def __repr__(self): 94 return _format(self) 95 96 def render(self, engine, context): 97 return u'' 98 99 100class _ChangeNode(object): 101 102 def __init__(self, delimiters): 103 self.delimiters = delimiters 104 105 def __repr__(self): 106 return _format(self) 107 108 def render(self, engine, context): 109 return u'' 110 111 112class _EscapeNode(object): 113 114 def __init__(self, key): 115 self.key = key 116 117 def __repr__(self): 118 return _format(self) 119 120 def render(self, engine, context): 121 s = engine.fetch_string(context, self.key) 122 return engine.escape(s) 123 124 125class _LiteralNode(object): 126 127 def __init__(self, key): 128 self.key = key 129 130 def __repr__(self): 131 return _format(self) 132 133 def render(self, engine, context): 134 s = engine.fetch_string(context, self.key) 135 return engine.literal(s) 136 137 138class _PartialNode(object): 139 140 def __init__(self, key, indent): 141 self.key = key 142 self.indent = indent 143 144 def __repr__(self): 145 return _format(self) 146 147 def render(self, engine, context): 148 template = engine.resolve_partial(self.key) 149 # Indent before rendering. 150 template = re.sub(NON_BLANK_RE, self.indent + ur'\1', template) 151 152 return engine.render(template, context) 153 154 155class _InvertedNode(object): 156 157 def __init__(self, key, parsed_section): 158 self.key = key 159 self.parsed_section = parsed_section 160 161 def __repr__(self): 162 return _format(self) 163 164 def render(self, engine, context): 165 # TODO: is there a bug because we are not using the same 166 # logic as in fetch_string()? 167 data = engine.resolve_context(context, self.key) 168 # Note that lambdas are considered truthy for inverted sections 169 # per the spec. 170 if data: 171 return u'' 172 return self.parsed_section.render(engine, context) 173 174 175class _SectionNode(object): 176 177 # TODO: the template_ and parsed_template_ arguments don't both seem 178 # to be necessary. Can we remove one of them? For example, if 179 # callable(data) is True, then the initial parsed_template isn't used. 180 def __init__(self, key, parsed, delimiters, template, index_begin, index_end): 181 self.delimiters = delimiters 182 self.key = key 183 self.parsed = parsed 184 self.template = template 185 self.index_begin = index_begin 186 self.index_end = index_end 187 188 def __repr__(self): 189 return _format(self, exclude=['delimiters', 'template']) 190 191 def render(self, engine, context): 192 values = engine.fetch_section_data(context, self.key) 193 194 parts = [] 195 for val in values: 196 if callable(val): 197 # Lambdas special case section rendering and bypass pushing 198 # the data value onto the context stack. From the spec-- 199 # 200 # When used as the data value for a Section tag, the 201 # lambda MUST be treatable as an arity 1 function, and 202 # invoked as such (passing a String containing the 203 # unprocessed section contents). The returned value 204 # MUST be rendered against the current delimiters, then 205 # interpolated in place of the section. 206 # 207 # Also see-- 208 # 209 # https://github.com/defunkt/pystache/issues/113 210 # 211 # TODO: should we check the arity? 212 val = val(self.template[self.index_begin:self.index_end]) 213 val = engine._render_value(val, context, delimiters=self.delimiters) 214 parts.append(val) 215 continue 216 217 context.push(val) 218 parts.append(self.parsed.render(engine, context)) 219 context.pop() 220 221 return unicode(''.join(parts)) 222 223 224class _Parser(object): 225 226 _delimiters = None 227 _template_re = None 228 229 def __init__(self, delimiters=None): 230 if delimiters is None: 231 delimiters = defaults.DELIMITERS 232 233 self._delimiters = delimiters 234 235 def _compile_delimiters(self): 236 self._template_re = _compile_template_re(self._delimiters) 237 238 def _change_delimiters(self, delimiters): 239 self._delimiters = delimiters 240 self._compile_delimiters() 241 242 def parse(self, template): 243 """ 244 Parse a template string starting at some index. 245 246 This method uses the current tag delimiter. 247 248 Arguments: 249 250 template: a unicode string that is the template to parse. 251 252 index: the index at which to start parsing. 253 254 Returns: 255 256 a ParsedTemplate instance. 257 258 """ 259 self._compile_delimiters() 260 261 start_index = 0 262 content_end_index, parsed_section, section_key = None, None, None 263 parsed_template = ParsedTemplate() 264 265 states = [] 266 267 while True: 268 match = self._template_re.search(template, start_index) 269 270 if match is None: 271 break 272 273 match_index = match.start() 274 end_index = match.end() 275 276 matches = match.groupdict() 277 278 # Normalize the matches dictionary. 279 if matches['change'] is not None: 280 matches.update(tag='=', tag_key=matches['delims']) 281 elif matches['raw'] is not None: 282 matches.update(tag='&', tag_key=matches['raw_name']) 283 284 tag_type = matches['tag'] 285 tag_key = matches['tag_key'] 286 leading_whitespace = matches['whitespace'] 287 288 # Standalone (non-interpolation) tags consume the entire line, 289 # both leading whitespace and trailing newline. 290 did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS 291 did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS 292 is_tag_interpolating = tag_type in ['', '&'] 293 294 if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating: 295 if end_index < len(template): 296 end_index += template[end_index] == '\r' and 1 or 0 297 if end_index < len(template): 298 end_index += template[end_index] == '\n' and 1 or 0 299 elif leading_whitespace: 300 match_index += len(leading_whitespace) 301 leading_whitespace = '' 302 303 # Avoid adding spurious empty strings to the parse tree. 304 if start_index != match_index: 305 parsed_template.add(template[start_index:match_index]) 306 307 start_index = end_index 308 309 if tag_type in ('#', '^'): 310 # Cache current state. 311 state = (tag_type, end_index, section_key, parsed_template) 312 states.append(state) 313 314 # Initialize new state 315 section_key, parsed_template = tag_key, ParsedTemplate() 316 continue 317 318 if tag_type == '/': 319 if tag_key != section_key: 320 raise ParsingError("Section end tag mismatch: %s != %s" % (tag_key, section_key)) 321 322 # Restore previous state with newly found section data. 323 parsed_section = parsed_template 324 325 (tag_type, section_start_index, section_key, parsed_template) = states.pop() 326 node = self._make_section_node(template, tag_type, tag_key, parsed_section, 327 section_start_index, match_index) 328 329 else: 330 node = self._make_interpolation_node(tag_type, tag_key, leading_whitespace) 331 332 parsed_template.add(node) 333 334 # Avoid adding spurious empty strings to the parse tree. 335 if start_index != len(template): 336 parsed_template.add(template[start_index:]) 337 338 return parsed_template 339 340 def _make_interpolation_node(self, tag_type, tag_key, leading_whitespace): 341 """ 342 Create and return a non-section node for the parse tree. 343 344 """ 345 # TODO: switch to using a dictionary instead of a bunch of ifs and elifs. 346 if tag_type == '!': 347 return _CommentNode() 348 349 if tag_type == '=': 350 delimiters = tag_key.split() 351 self._change_delimiters(delimiters) 352 return _ChangeNode(delimiters) 353 354 if tag_type == '': 355 return _EscapeNode(tag_key) 356 357 if tag_type == '&': 358 return _LiteralNode(tag_key) 359 360 if tag_type == '>': 361 return _PartialNode(tag_key, leading_whitespace) 362 363 raise Exception("Invalid symbol for interpolation tag: %s" % repr(tag_type)) 364 365 def _make_section_node(self, template, tag_type, tag_key, parsed_section, 366 section_start_index, section_end_index): 367 """ 368 Create and return a section node for the parse tree. 369 370 """ 371 if tag_type == '#': 372 return _SectionNode(tag_key, parsed_section, self._delimiters, 373 template, section_start_index, section_end_index) 374 375 if tag_type == '^': 376 return _InvertedNode(tag_key, parsed_section) 377 378 raise Exception("Invalid symbol for section tag: %s" % repr(tag_type)) 379