1#!/usr/bin/env python
2#
3#
4# Licensed to the Apache Software Foundation (ASF) under one
5# or more contributor license agreements.  See the NOTICE file
6# distributed with this work for additional information
7# regarding copyright ownership.  The ASF licenses this file
8# to you under the Apache License, Version 2.0 (the
9# "License"); you may not use this file except in compliance
10# with the License.  You may obtain a copy of the License at
11#
12#   http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing,
15# software distributed under the License is distributed on an
16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17# KIND, either express or implied.  See the License for the
18# specific language governing permissions and limitations
19# under the License.
20#
21#
22"""ezt.py -- easy templating
23
24ezt templates are simply text files in whatever format you so desire
25(such as XML, HTML, etc.) which contain directives sprinkled
26throughout.  With these directives it is possible to generate the
27dynamic content from the ezt templates.
28
29These directives are enclosed in square brackets.  If you are a
30C-programmer, you might be familar with the #ifdef directives of the C
31preprocessor 'cpp'.  ezt provides a similar concept.  Additionally EZT
32has a 'for' directive, which allows it to iterate (repeat) certain
33subsections of the template according to sequence of data items
34provided by the application.
35
36The final rendering is performed by the method generate() of the Template
37class.  Building template instances can either be done using external
38EZT files (convention: use the suffix .ezt for such files):
39
40    >>> template = Template("../templates/log.ezt")
41
42or by calling the parse() method of a template instance directly with
43a EZT template string:
44
45    >>> template = Template()
46    >>> template.parse('''<html><head>
47    ... <title>[title_string]</title></head>
48    ... <body><h1>[title_string]</h1>
49    ...    [for a_sequence] <p>[a_sequence]</p>
50    ...    [end] <hr>
51    ...    The [person] is [if-any state]in[else]out[end].
52    ... </body>
53    ... </html>
54    ... ''')
55
56The application should build a dictionary 'data' and pass it together
57with the output fileobject to the templates generate method:
58
59    >>> data = {'title_string' : "A Dummy Page",
60    ...         'a_sequence' : ['list item 1', 'list item 2', 'another element'],
61    ...         'person': "doctor",
62    ...         'state' : None }
63    >>> import sys
64    >>> template.generate(sys.stdout, data)
65    <html><head>
66    <title>A Dummy Page</title></head>
67    <body><h1>A Dummy Page</h1>
68     <p>list item 1</p>
69     <p>list item 2</p>
70     <p>another element</p>
71     <hr>
72    The doctor is out.
73    </body>
74    </html>
75
76Template syntax error reporting should be improved.  Currently it is
77very sparse (template line numbers would be nice):
78
79    >>> Template().parse("[if-any where] foo [else] bar [end unexpected args]")
80    Traceback (innermost last):
81      File "<stdin>", line 1, in ?
82      File "ezt.py", line 220, in parse
83        self.program = self._parse(text)
84      File "ezt.py", line 275, in _parse
85        raise ArgCountSyntaxError(str(args[1:]))
86    ArgCountSyntaxError: ['unexpected', 'args']
87    >>> Template().parse("[if unmatched_end]foo[end]")
88    Traceback (innermost last):
89      File "<stdin>", line 1, in ?
90      File "ezt.py", line 206, in parse
91        self.program = self._parse(text)
92      File "ezt.py", line 266, in _parse
93        raise UnmatchedEndError()
94    UnmatchedEndError
95
96
97Directives
98==========
99
100 Several directives allow the use of dotted qualified names refering to objects
101 or attributes of objects contained in the data dictionary given to the
102 .generate() method.
103
104 Qualified names
105 ---------------
106
107   Qualified names have two basic forms: a variable reference, or a string
108   constant. References are a name from the data dictionary with optional
109   dotted attributes (where each intermediary is an object with attributes,
110   of course).
111
112   Examples:
113
114     [varname]
115
116     [ob.attr]
117
118     ["string"]
119
120 Simple directives
121 -----------------
122
123   [QUAL_NAME]
124
125   This directive is simply replaced by the value of the qualified name.
126   Numbers are converted to a string, and None becomes an empty string.
127
128   [QUAL_NAME QUAL_NAME ...]
129
130   The first value defines a substitution format, specifying constant
131   text and indices of the additional arguments. The arguments are then
132   substituted and the resulting is inserted into the output stream.
133
134   Example:
135     ["abc %0 def %1 ghi %0" foo bar.baz]
136
137   Note that the first value can be any type of qualified name -- a string
138   constant or a variable reference. Use %% to substitute a percent sign.
139   Argument indices are 0-based.
140
141   [include "filename"]  or [include QUAL_NAME]
142
143   This directive is replaced by content of the named include file. Note
144   that a string constant is more efficient -- the target file is compiled
145   inline. In the variable form, the target file is compiled and executed
146   at runtime.
147
148   [insertfile "filename"] or [insertfile QUAL_NAME]
149
150   This directive is replace by content from the named file, but as a
151   literal string: directives in the target file are not expanded.  As
152   in the case of the "include" directive, using a string constant for
153   the filename is more efficient than the variable form.
154
155 Block directives
156 ----------------
157
158   [for QUAL_NAME] ... [end]
159
160   The text within the [for ...] directive and the corresponding [end]
161   is repeated for each element in the sequence referred to by the
162   qualified name in the for directive.  Within the for block this
163   identifiers now refers to the actual item indexed by this loop
164   iteration.
165
166   [if-any QUAL_NAME [QUAL_NAME2 ...]] ... [else] ... [end]
167
168   Test if any QUAL_NAME value is not None or an empty string or list.
169   The [else] clause is optional.  CAUTION: Numeric values are
170   converted to string, so if QUAL_NAME refers to a numeric value 0,
171   the then-clause is substituted!
172
173   [if-index INDEX_FROM_FOR odd] ... [else] ... [end]
174   [if-index INDEX_FROM_FOR even] ... [else] ... [end]
175   [if-index INDEX_FROM_FOR first] ... [else] ... [end]
176   [if-index INDEX_FROM_FOR last] ... [else] ... [end]
177   [if-index INDEX_FROM_FOR NUMBER] ... [else] ... [end]
178
179   These five directives work similar to [if-any], but are only useful
180   within a [for ...]-block (see above).  The odd/even directives are
181   for example useful to choose different background colors for
182   adjacent rows in a table.  Similar the first/last directives might
183   be used to remove certain parts (for example "Diff to previous"
184   doesn't make sense, if there is no previous).
185
186   [is QUAL_NAME STRING] ... [else] ... [end]
187   [is QUAL_NAME QUAL_NAME] ... [else] ... [end]
188
189   The [is ...] directive is similar to the other conditional
190   directives above.  But it allows to compare two value references or
191   a value reference with some constant string.
192
193   [define VARIABLE] ... [end]
194
195   The [define ...] directive allows you to create and modify template
196   variables from within the template itself.  Essentially, any data
197   between inside the [define ...] and its matching [end] will be
198   expanded using the other template parsing and output generation
199   rules, and then stored as a string value assigned to the variable
200   VARIABLE.  The new (or changed) variable is then available for use
201   with other mechanisms such as [is ...] or [if-any ...], as long as
202   they appear later in the template.
203
204   [format "html|xml|js|url|raw"] ... [end]
205
206   The [format ...] directive creates a block in which any substitutions
207   are processed as though the template has been instantiated with the
208   the corresponding 'base_format' argument. Comma-separated format
209   specifiers perform nested encodings. In this case the encodings are
210   applied left-to-right.  For example the directive: [format "html,js"]
211   will HTML and then Javascript encode any inserted template variables.
212"""
213#
214# Copyright (C) 2001-2009 Greg Stein. All Rights Reserved.
215#
216# Redistribution and use in source and binary forms, with or without
217# modification, are permitted provided that the following conditions are
218# met:
219#
220# * Redistributions of source code must retain the above copyright
221#   notice, this list of conditions and the following disclaimer.
222#
223# * Redistributions in binary form must reproduce the above copyright
224#   notice, this list of conditions and the following disclaimer in the
225#   documentation and/or other materials provided with the distribution.
226#
227# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
228# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
229# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
230# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
231# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
232# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
233# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
234# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
235# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
236# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
237# POSSIBILITY OF SUCH DAMAGE.
238#
239#
240# This software is maintained by Greg and is available at:
241#    http://code.google.com/p/ezt/
242#
243
244import os, re, sys
245
246if sys.version_info[0] >= 3:
247  # Python >=3.0
248  long = int
249  unicode = str
250  from io import StringIO
251  from urllib.parse import quote_plus as urllib_parse_quote_plus
252else:
253  # Python <3.0
254  from urllib import quote_plus as urllib_parse_quote_plus
255  try:
256    from cStringIO import StringIO
257  except ImportError:
258    from StringIO import StringIO
259
260#
261# Formatting types
262#
263FORMAT_RAW = 'raw'
264FORMAT_HTML = 'html'
265FORMAT_XML = 'xml'
266FORMAT_JS = 'js'
267FORMAT_URL = 'url'
268
269#
270# This regular expression matches three alternatives:
271#   expr: NEWLINE | DIRECTIVE | BRACKET | COMMENT
272#   DIRECTIVE: '[' ITEM (whitespace ITEM)* ']
273#   ITEM: STRING | NAME
274#   STRING: '"' (not-slash-or-dquote | '\' anychar)* '"'
275#   NAME: (alphanum | '_' | '-' | '.')+
276#   BRACKET: '[[]'
277#   COMMENT: '[#' not-rbracket* ']'
278#
279# When used with the split() method, the return value will be composed of
280# non-matching text and the three paren groups (NEWLINE, DIRECTIVE and
281# BRACKET). Since the COMMENT matches are not placed into a group, they are
282# considered a "splitting" value and simply dropped.
283#
284_item = r'(?:"(?:[^\\"]|\\.)*"|[-\w.]+)'
285_re_parse = re.compile(r'(\r?\n)|\[(%s(?: +%s)*)\]|(\[\[\])|\[#[^\]]*\]' %
286                       (_item, _item))
287
288_re_args = re.compile(r'"(?:[^\\"]|\\.)*"|[-\w.]+')
289
290# block commands and their argument counts
291_block_cmd_specs = { 'if-index':2, 'for':1, 'is':2, 'define':1, 'format':1 }
292_block_cmds = _block_cmd_specs.keys()
293
294# two regular expressions for compressing whitespace. the first is used to
295# compress any whitespace including a newline into a single newline. the
296# second regex is used to compress runs of whitespace into a single space.
297_re_newline = re.compile('[ \t\r\f\v]*\n\\s*')
298_re_whitespace = re.compile(r'\s\s+')
299
300# this regex is used to substitute arguments into a value. we split the value,
301# replace the relevant pieces, and then put it all back together. splitting
302# will produce a list of: TEXT ( splitter TEXT )*. splitter will be '%' or
303# an integer.
304_re_subst = re.compile('%(%|[0-9]+)')
305
306class Template:
307
308  def __init__(self, fname=None, compress_whitespace=1,
309               base_format=FORMAT_RAW):
310    self.compress_whitespace = compress_whitespace
311    if fname:
312      self.parse_file(fname, base_format)
313
314  def parse_file(self, fname, base_format=FORMAT_RAW):
315    "fname -> a string object with pathname of file containg an EZT template."
316
317    self.parse(_FileReader(fname), base_format)
318
319  def parse(self, text_or_reader, base_format=FORMAT_RAW):
320    """Parse the template specified by text_or_reader.
321
322    The argument should be a string containing the template, or it should
323    specify a subclass of ezt.Reader which can read templates. The base
324    format for printing values is given by base_format.
325    """
326    if not isinstance(text_or_reader, Reader):
327      # assume the argument is a plain text string
328      text_or_reader = _TextReader(text_or_reader)
329
330    self.program = self._parse(text_or_reader,
331                               base_printer=_parse_format(base_format))
332
333  def generate(self, fp, data):
334    if hasattr(data, '__getitem__') or hasattr(getattr(data, 'keys', None), '__call__'):
335      # a dictionary-like object was passed. convert it to an
336      # attribute-based object.
337      class _data_ob:
338        def __init__(self, d):
339          vars(self).update(d)
340      data = _data_ob(data)
341
342    ctx = _context()
343    ctx.data = data
344    ctx.for_index = { }
345    ctx.defines = { }
346    self._execute(self.program, fp, ctx)
347
348  def _parse(self, reader, for_names=None, file_args=(), base_printer=None):
349    """text -> string object containing the template.
350
351    This is a private helper function doing the real work for method parse.
352    It returns the parsed template as a 'program'.  This program is a sequence
353    made out of strings or (function, argument) 2-tuples.
354
355    Note: comment directives [# ...] are automatically dropped by _re_parse.
356    """
357
358    filename = reader.filename()
359    # parse the template program into: (TEXT NEWLINE DIRECTIVE BRACKET)* TEXT
360    parts = _re_parse.split(reader.text)
361
362    program = [ ]
363    stack = [ ]
364    if not for_names:
365      for_names = [ ]
366
367    if base_printer is None:
368      base_printer = ()
369    printers = [ base_printer ]
370
371    one_newline_copied = False
372    line_number = 1
373    for i in range(len(parts)):
374      piece = parts[i]
375      which = i % 4  # discriminate between: TEXT NEWLINE DIRECTIVE BRACKET
376      if which == 0:
377        # TEXT. append if non-empty.
378        if piece:
379          if self.compress_whitespace:
380            piece = _re_whitespace.sub(' ', piece)
381          program.append(piece)
382          one_newline_copied = False
383      elif which == 1:
384        # NEWLINE. append unless compress_whitespace requested
385        if piece:
386          line_number += 1
387          if self.compress_whitespace:
388            if not one_newline_copied:
389              program.append('\n')
390              one_newline_copied = True
391          else:
392            program.append(piece)
393      elif which == 3:
394        # BRACKET directive. append '[' if present.
395        if piece:
396          program.append('[')
397          one_newline_copied = False
398      elif piece:
399        # DIRECTIVE is present.
400        one_newline_copied = False
401        args = _re_args.findall(piece)
402        cmd = args[0]
403        if cmd == 'else':
404          if len(args) > 1:
405            raise ArgCountSyntaxError(str(args[1:]), filename, line_number)
406          ### check: don't allow for 'for' cmd
407          idx = stack[-1][1]
408          true_section = program[idx:]
409          del program[idx:]
410          stack[-1][3] = true_section
411        elif cmd == 'end':
412          if len(args) > 1:
413            raise ArgCountSyntaxError(str(args[1:]), filename, line_number)
414          # note: true-section may be None
415          try:
416            cmd, idx, args, true_section, start_line_number = stack.pop()
417          except IndexError:
418            raise UnmatchedEndError(None, filename, line_number)
419          else_section = program[idx:]
420          if cmd == 'format':
421            printers.pop()
422          else:
423            func = getattr(self, '_cmd_' + re.sub('-', '_', cmd))
424            program[idx:] = [ (func, (args, true_section, else_section),
425                               filename, line_number) ]
426            if cmd == 'for':
427              for_names.pop()
428        elif cmd in _block_cmds:
429          if len(args) > _block_cmd_specs[cmd] + 1:
430            raise ArgCountSyntaxError(str(args[1:]), filename, line_number)
431          ### this assumes arg1 is always a ref unless cmd is 'define'
432          if cmd != 'define':
433            args[1] = _prepare_ref(args[1], for_names, file_args)
434
435          # handle arg2 for the 'is' command
436          if cmd == 'is':
437            args[2] = _prepare_ref(args[2], for_names, file_args)
438          elif cmd == 'for':
439            for_names.append(args[1][0])  # append the refname
440          elif cmd == 'format':
441            if args[1][0]:
442              raise BadFormatConstantError(str(args[1:]), filename, line_number)
443            printers.append(_parse_format(args[1][1]))
444
445          # remember the cmd, current pos, args, and a section placeholder
446          stack.append([cmd, len(program), args[1:], None, line_number])
447        elif cmd == 'include' or cmd == 'insertfile':
448          is_insertfile = (cmd == 'insertfile')
449          # extra arguments are meaningless when using insertfile
450          if is_insertfile and len(args) != 2:
451            raise ArgCountSyntaxError(str(args), filename, line_number)
452          if args[1][0] == '"':
453            include_filename = args[1][1:-1]
454            if is_insertfile:
455              program.append(reader.read_other(include_filename).text)
456            else:
457              f_args = [ ]
458              for arg in args[2:]:
459                f_args.append(_prepare_ref(arg, for_names, file_args))
460              program.extend(self._parse(reader.read_other(include_filename),
461                                         for_names, f_args, printers[-1]))
462          else:
463            if len(args) != 2:
464              raise ArgCountSyntaxError(str(args), filename, line_number)
465            if is_insertfile:
466              cmd = self._cmd_insertfile
467            else:
468              cmd = self._cmd_include
469            program.append((cmd,
470                            (_prepare_ref(args[1], for_names, file_args),
471                             reader, printers[-1]), filename, line_number))
472        elif cmd == 'if-any':
473          f_args = [ ]
474          for arg in args[1:]:
475            f_args.append(_prepare_ref(arg, for_names, file_args))
476          stack.append(['if-any', len(program), f_args, None, line_number])
477        else:
478          # implied PRINT command
479          if len(args) > 1:
480            f_args = [ ]
481            for arg in args:
482              f_args.append(_prepare_ref(arg, for_names, file_args))
483            program.append((self._cmd_subst,
484                            (printers[-1], f_args[0], f_args[1:]),
485                            filename, line_number))
486          else:
487            valref = _prepare_ref(args[0], for_names, file_args)
488            program.append((self._cmd_print, (printers[-1], valref),
489                            filename, line_number))
490
491    if stack:
492      raise UnclosedBlocksError('Block opened at line %s' % stack[-1][4],
493                                filename=filename)
494    return program
495
496  def _execute(self, program, fp, ctx):
497    """This private helper function takes a 'program' sequence as created
498    by the method '_parse' and executes it step by step.  strings are written
499    to the file object 'fp' and functions are called.
500    """
501    for step in program:
502      if isinstance(step, str):
503        fp.write(step)
504      else:
505        method, method_args, filename, line_number = step
506        method(method_args, fp, ctx, filename, line_number)
507
508  def _cmd_print(self, transforms_valref, fp, ctx, filename, line_number):
509    (transforms, valref) = transforms_valref
510    value = _get_value(valref, ctx, filename, line_number)
511    # if the value has a 'read' attribute, then it is a stream: copy it
512    if hasattr(value, 'read'):
513      while 1:
514        chunk = value.read(16384)
515        if not chunk:
516          break
517        for t in transforms:
518          chunk = t(chunk)
519        fp.write(chunk)
520    else:
521      for t in transforms:
522        value = t(value)
523      fp.write(value)
524
525  def _cmd_subst(self, transforms_valref_args, fp, ctx, filename,
526                 line_number):
527    (transforms, valref, args) = transforms_valref_args
528    fmt = _get_value(valref, ctx, filename, line_number)
529    parts = _re_subst.split(fmt)
530    for i in range(len(parts)):
531      piece = parts[i]
532      if i%2 == 1 and piece != '%':
533        idx = int(piece)
534        if idx < len(args):
535          piece = _get_value(args[idx], ctx, filename, line_number)
536        else:
537          piece = '<undef>'
538      for t in transforms:
539        piece = t(piece)
540      fp.write(piece)
541
542  def _cmd_include(self, valref_reader_printer, fp, ctx, filename,
543                   line_number):
544    (valref, reader, printer) = valref_reader_printer
545    fname = _get_value(valref, ctx, filename, line_number)
546    ### note: we don't have the set of for_names to pass into this parse.
547    ### I don't think there is anything to do but document it
548    self._execute(self._parse(reader.read_other(fname), base_printer=printer),
549                  fp, ctx)
550
551  def _cmd_insertfile(self, valref_reader_printer, fp, ctx, filename,
552                      line_number):
553    (valref, reader, printer) = valref_reader_printer
554    fname = _get_value(valref, ctx, filename, line_number)
555    fp.write(reader.read_other(fname).text)
556
557  def _cmd_if_any(self, args, fp, ctx, filename, line_number):
558    "If any value is a non-empty string or non-empty list, then T else F."
559    (valrefs, t_section, f_section) = args
560    value = 0
561    for valref in valrefs:
562      if _get_value(valref, ctx, filename, line_number):
563        value = 1
564        break
565    self._do_if(value, t_section, f_section, fp, ctx)
566
567  def _cmd_if_index(self, args, fp, ctx, filename, line_number):
568    ((valref, value), t_section, f_section) = args
569    list, idx = ctx.for_index[valref[0]]
570    if value == 'even':
571      value = idx % 2 == 0
572    elif value == 'odd':
573      value = idx % 2 == 1
574    elif value == 'first':
575      value = idx == 0
576    elif value == 'last':
577      value = idx == len(list)-1
578    else:
579      value = idx == int(value)
580    self._do_if(value, t_section, f_section, fp, ctx)
581
582  def _cmd_is(self, args, fp, ctx, filename, line_number):
583    ((left_ref, right_ref), t_section, f_section) = args
584    right_value = _get_value(right_ref, ctx, filename, line_number)
585    left_value = _get_value(left_ref, ctx, filename, line_number)
586    value = left_value.lower() == right_value.lower()
587    self._do_if(value, t_section, f_section, fp, ctx)
588
589  def _do_if(self, value, t_section, f_section, fp, ctx):
590    if t_section is None:
591      t_section = f_section
592      f_section = None
593    if value:
594      section = t_section
595    else:
596      section = f_section
597    if section is not None:
598      self._execute(section, fp, ctx)
599
600  def _cmd_for(self, args, fp, ctx, filename, line_number):
601    ((valref,), unused, section) = args
602    list = _get_value(valref, ctx, filename, line_number)
603    refname = valref[0]
604    if isinstance(list, str):
605      raise NeedSequenceError(refname, filename, line_number)
606    ctx.for_index[refname] = idx = [ list, 0 ]
607    for item in list:
608      self._execute(section, fp, ctx)
609      idx[1] = idx[1] + 1
610    del ctx.for_index[refname]
611
612  def _cmd_define(self, args, fp, ctx, filename, line_number):
613    ((name,), unused, section) = args
614    valfp = StringIO()
615    if section is not None:
616      self._execute(section, valfp, ctx)
617    ctx.defines[name] = valfp.getvalue()
618
619def boolean(value):
620  "Return a value suitable for [if-any bool_var] usage in a template."
621  if value:
622    return 'yes'
623  return None
624
625
626def _prepare_ref(refname, for_names, file_args):
627  """refname -> a string containing a dotted identifier. example:"foo.bar.bang"
628  for_names -> a list of active for sequences.
629
630  Returns a `value reference', a 3-tuple made out of (refname, start, rest),
631  for fast access later.
632  """
633  # is the reference a string constant?
634  if refname[0] == '"':
635    return None, refname[1:-1], None
636
637  parts = refname.split('.')
638  start = parts[0]
639  rest = parts[1:]
640
641  # if this is an include-argument, then just return the prepared ref
642  if start[:3] == 'arg':
643    try:
644      idx = int(start[3:])
645    except ValueError:
646      pass
647    else:
648      if idx < len(file_args):
649        orig_refname, start, more_rest = file_args[idx]
650        if more_rest is None:
651          # the include-argument was a string constant
652          return None, start, None
653
654        # prepend the argument's "rest" for our further processing
655        rest[:0] = more_rest
656
657        # rewrite the refname to ensure that any potential 'for' processing
658        # has the correct name
659        ### this can make it hard for debugging include files since we lose
660        ### the 'argNNN' names
661        if not rest:
662          return start, start, [ ]
663        refname = start + '.' + '.'.join(rest)
664
665  if for_names:
666    # From last to first part, check if this reference is part of a for loop
667    for i in range(len(parts), 0, -1):
668      name = '.'.join(parts[:i])
669      if name in for_names:
670        return refname, name, parts[i:]
671
672  return refname, start, rest
673
674def _get_value(refname_start_rest, ctx, filename, line_number):
675  """refname_start_rest -> a prepared `value reference' (see above).
676  ctx -> an execution context instance.
677
678  Does a name space lookup within the template name space.  Active
679  for blocks take precedence over data dictionary members with the
680  same name.
681  """
682  (refname, start, rest) = refname_start_rest
683  if rest is None:
684    # it was a string constant
685    return start
686
687  # get the starting object
688  if start in ctx.for_index:
689    list, idx = ctx.for_index[start]
690    ob = list[idx]
691  elif start in ctx.defines:
692    ob = ctx.defines[start]
693  elif hasattr(ctx.data, start):
694    ob = getattr(ctx.data, start)
695  else:
696    raise UnknownReference(refname, filename, line_number)
697
698  # walk the rest of the dotted reference
699  for attr in rest:
700    try:
701      ob = getattr(ob, attr)
702    except AttributeError:
703      raise UnknownReference(refname, filename, line_number)
704
705  # make sure we return a string instead of some various Python types
706  if isinstance(ob, (int, long, float)):
707    return str(ob)
708  if ob is None:
709    return ''
710
711  # string or a sequence
712  return ob
713
714def _replace(s, replace_map):
715  for orig, repl in replace_map:
716    s = s.replace(orig, repl)
717  return s
718
719REPLACE_JS_MAP = (
720  ('\\', r'\\'), ('\t', r'\t'), ('\n', r'\n'), ('\r', r'\r'),
721  ('"', r'\x22'), ('\'', r'\x27'), ('&', r'\x26'),
722  ('<', r'\x3c'), ('>', r'\x3e'), ('=', r'\x3d'),
723)
724
725# Various unicode whitespace
726if sys.version_info[0] >= 3:
727  # Python >=3.0
728  REPLACE_JS_UNICODE_MAP = (
729    ('\u0085', r'\u0085'), ('\u2028', r'\u2028'), ('\u2029', r'\u2029')
730  )
731else:
732  # Python <3.0
733  REPLACE_JS_UNICODE_MAP = eval("((u'\u0085', r'\u0085'), (u'\u2028', r'\u2028'), (u'\u2029', r'\u2029'))")
734
735# Why not cgi.escape? It doesn't do single quotes which are occasionally
736# used to contain HTML attributes and event handler definitions (unfortunately)
737REPLACE_HTML_MAP = (
738  ('&', '&amp;'), ('<', '&lt;'), ('>', '&gt;'),
739  ('"', '&quot;'), ('\'', '&#39;'),
740)
741
742def _js_escape(s):
743  s = _replace(s, REPLACE_JS_MAP)
744  ### perhaps attempt to coerce the string to unicode and then replace?
745  if isinstance(s, unicode):
746    s = _replace(s, REPLACE_JS_UNICODE_MAP)
747  return s
748
749def _html_escape(s):
750  return _replace(s, REPLACE_HTML_MAP)
751
752def _url_escape(s):
753  ### quote_plus barfs on non-ASCII characters. According to
754  ### http://www.w3.org/International/O-URL-code.html URIs should be
755  ### UTF-8 encoded first.
756  if isinstance(s, unicode):
757    s = s.encode('utf8')
758  return urllib_parse_quote_plus(s)
759
760FORMATTERS = {
761  FORMAT_RAW: None,
762  FORMAT_HTML: _html_escape,
763  FORMAT_XML: _html_escape,   ### use the same quoting as HTML for now
764  FORMAT_JS: _js_escape,
765  FORMAT_URL: _url_escape,
766}
767
768def _parse_format(format_string=FORMAT_RAW):
769  format_funcs = []
770  try:
771    for fspec in format_string.split(','):
772      format_func = FORMATTERS[fspec]
773      if format_func is not None:
774        format_funcs.append(format_func)
775  except KeyError:
776    raise UnknownFormatConstantError(format_string)
777  return format_funcs
778
779class _context:
780  """A container for the execution context"""
781
782
783class Reader:
784  """Abstract class which allows EZT to detect Reader objects."""
785  def filename(self):
786    return '(%s does not provide filename() method)' % repr(self)
787
788class _FileReader(Reader):
789  """Reads templates from the filesystem."""
790  def __init__(self, fname):
791    self.text = open(fname, 'rb').read()
792    if sys.version_info[0] >= 3:
793      # Python >=3.0
794      self.text = self.text.decode()
795    self._dir = os.path.dirname(fname)
796    self.fname = fname
797  def read_other(self, relative):
798    return _FileReader(os.path.join(self._dir, relative))
799  def filename(self):
800    return self.fname
801
802class _TextReader(Reader):
803  """'Reads' a template from provided text."""
804  def __init__(self, text):
805    self.text = text
806  def read_other(self, relative):
807    raise BaseUnavailableError()
808  def filename(self):
809    return '(text)'
810
811
812class EZTException(Exception):
813  """Parent class of all EZT exceptions."""
814  def __init__(self, message=None, filename=None, line_number=None):
815    self.message = message
816    self.filename = filename
817    self.line_number = line_number
818  def __str__(self):
819    ret = []
820    if self.message is not None:
821      ret.append(self.message)
822    if self.filename is not None:
823      ret.append('in file ' + str(self.filename))
824    if self.line_number is not None:
825      ret.append('at line ' + str(self.line_number))
826    return ' '.join(ret)
827
828class ArgCountSyntaxError(EZTException):
829  """A bracket directive got the wrong number of arguments."""
830
831class UnknownReference(EZTException):
832  """The template references an object not contained in the data dictionary."""
833
834class NeedSequenceError(EZTException):
835  """The object dereferenced by the template is no sequence (tuple or list)."""
836
837class UnclosedBlocksError(EZTException):
838  """This error may be simply a missing [end]."""
839
840class UnmatchedEndError(EZTException):
841  """This error may be caused by a misspelled if directive."""
842
843class BaseUnavailableError(EZTException):
844  """Base location is unavailable, which disables includes."""
845
846class BadFormatConstantError(EZTException):
847  """Format specifiers must be string constants."""
848
849class UnknownFormatConstantError(EZTException):
850  """The format specifier is an unknown value."""
851
852
853# --- standard test environment ---
854def test_parse():
855  assert _re_parse.split('[a]') == ['', '[a]', None, '']
856  assert _re_parse.split('[a] [b]') == \
857         ['', '[a]', None, ' ', '[b]', None, '']
858  assert _re_parse.split('[a c] [b]') == \
859         ['', '[a c]', None, ' ', '[b]', None, '']
860  assert _re_parse.split('x [a] y [b] z') == \
861         ['x ', '[a]', None, ' y ', '[b]', None, ' z']
862  assert _re_parse.split('[a "b" c "d"]') == \
863         ['', '[a "b" c "d"]', None, '']
864  assert _re_parse.split(r'["a \"b[foo]" c.d f]') == \
865         ['', '["a \\"b[foo]" c.d f]', None, '']
866
867def _test(argv):
868  import doctest, ezt
869  verbose = "-v" in argv
870  return doctest.testmod(ezt, verbose=verbose)
871
872if __name__ == "__main__":
873  # invoke unit test for this module:
874  import sys
875  sys.exit(_test(sys.argv)[0])
876