1# -*- coding: iso-8859-1 -*-
2"""
3    MoinMoin - MoinMoin Wiki Markup Parser
4
5    @copyright: 2000-2002 Juergen Hermann <jh@web.de>,
6                2006-2008 MoinMoin:ThomasWaldmann,
7                2007 by MoinMoin:ReimarBauer
8    @license: GNU GPL, see COPYING for details.
9"""
10
11import re
12
13from MoinMoin import log
14logging = log.getLogger(__name__)
15
16from MoinMoin import config, wikiutil, macro
17from MoinMoin.Page import Page
18
19Dependencies = ['user'] # {{{#!wiki comment ... }}} has different output depending on the user's profile settings
20
21
22_ = lambda x: x
23
24class Parser:
25    """
26        Parse wiki format markup (and call the formatter to generate output).
27
28        All formatting commands can be parsed one line at a time, though
29        some state is carried over between lines.
30
31        Methods named like _*_repl() are responsible to handle the named regex patterns.
32    """
33
34    extensions = ['.moin']
35    # allow caching
36    caching = 1
37    Dependencies = Dependencies
38    quickhelp = _(u"""\
39 Emphasis:: <<Verbatim('')>>''italics''<<Verbatim('')>>; <<Verbatim(''')>>'''bold'''<<Verbatim(''')>>; <<Verbatim(''''')>>'''''bold italics'''''<<Verbatim(''''')>>; <<Verbatim('')>>''mixed ''<<Verbatim(''')>>'''''bold'''<<Verbatim(''')>> and italics''<<Verbatim('')>>; <<Verbatim(----)>> horizontal rule.
40 Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====.
41 Lists:: space and one of: * bullets; 1., a., A., i., I. numbered items; 1.#n start numbering at n; space alone indents.
42 Links:: <<Verbatim(JoinCapitalizedWords)>>; <<Verbatim([[target|linktext]])>>.
43 Tables:: || cell text |||| cell text spanning 2 columns ||;    no trailing white space allowed after tables or titles.
44
45(!) For more help, see HelpOnEditing or HelpOnMoinWikiSyntax.
46""")
47
48    # some common strings
49    CHILD_PREFIX = wikiutil.CHILD_PREFIX
50    CHILD_PREFIX_LEN = wikiutil.CHILD_PREFIX_LEN
51    PARENT_PREFIX = wikiutil.PARENT_PREFIX
52    PARENT_PREFIX_LEN = wikiutil.PARENT_PREFIX_LEN
53
54    punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
55    url_scheme = u'|'.join(config.url_schemas)
56
57    # some common rules
58    url_rule = ur'''
59        (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
60        (?P<url_target>  # capture whole url there
61         (?P<url_scheme>%(url_scheme)s)  # some scheme
62         \:
63         \S+?  # anything non-whitespace
64        )
65        (?:$|(?=\s|[%(punct)s]+(\s|$)))  # require either end of line or some whitespace or some punctuation+blank/eol afterwards
66    ''' % {
67        'url_scheme': url_scheme,
68        'punct': punct_pattern,
69    }
70
71    # this is for a free (non-bracketed) interwiki link - to avoid false positives,
72    # we are rather restrictive here (same as in moin 1.5: require that the
73    # interwiki_wiki name starts with an uppercase letter A-Z. Later, the code
74    # also checks whether the wiki name is in the interwiki map (if not, it renders
75    # normal text, no link):
76    interwiki_rule = ur'''
77        (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
78        (?P<interwiki_wiki>[A-Z][a-zA-Z]+)  # interwiki wiki name
79        \:
80        (?P<interwiki_page>  # interwiki page name
81         (?=[^ ]*[%(u)s%(l)s0..9][^ ]*\ )  # make sure there is something non-blank with at least one alphanum letter following
82         [^\s%(punct)s]+  # we take all until we hit some blank or punctuation char ...
83        )
84    ''' % {
85        'u': config.chars_upper,
86        'l': config.chars_lower,
87        'punct': punct_pattern,
88    }
89
90    # BE CAREFUL: if you do changes to word_rule, consider doing them also to word_rule_js (see below)
91    word_rule = ur'''
92        (?:
93         (?<![%(u)s%(l)s/])  # require anything not upper/lower/slash before
94         |
95         ^  # ... or beginning of line
96        )
97        (?P<word_bang>\!)?  # configurable: avoid getting CamelCase rendered as link
98        (?P<word_name>
99         (?:
100          (%(parent)s)*  # there might be either ../ parent prefix(es)
101          |
102          ((?<!%(child)s)%(child)s)?  # or maybe a single / child prefix (but not if we already had it before)
103         )
104         (
105          ((?<!%(child)s)%(child)s)?  # there might be / child prefix (but not if we already had it before)
106          (?:[%(u)s][%(l)s]+){2,}  # at least 2 upper>lower transitions make CamelCase
107         )+  # we can have MainPage/SubPage/SubSubPage ...
108         (?:
109          \#  # anchor separator          TODO check if this does not make trouble at places where word_rule is used
110          (?P<word_anchor>\S+)  # some anchor name
111         )?
112        )
113        (?:
114         (?![%(u)s%(l)s/])  # require anything not upper/lower/slash following
115         |
116         $  # ... or end of line
117        )
118    ''' % {
119        'u': config.chars_upper,
120        'l': config.chars_lower,
121        'child': re.escape(CHILD_PREFIX),
122        'parent': re.escape(PARENT_PREFIX),
123    }
124    # simplified word_rule for FCKeditor's "unlink" plugin (puts a ! in front of a WikiName if WikiName matches word_rule_js),
125    # because JavaScript can not use group names and verbose regular expressions!
126    word_rule_js = (
127        ur'''(?:(?<![%(u)s%(l)s/])|^)'''
128        ur'''(?:'''
129         ur'''(?:(%(parent)s)*|((?<!%(child)s)%(child)s)?)'''
130         ur'''(((?<!%(child)s)%(child)s)?(?:[%(u)s][%(l)s]+){2,})+'''
131         ur'''(?:\#(?:\S+))?'''
132        ur''')'''
133        ur'''(?:(?![%(u)s%(l)s/])|$)'''
134    ) % {
135        'u': config.chars_upper,
136        'l': config.chars_lower,
137        'child': re.escape(CHILD_PREFIX),
138        'parent': re.escape(PARENT_PREFIX),
139    }
140
141    # link targets:
142    extern_rule = r'(?P<extern_addr>(?P<extern_scheme>%s)\:.*)' % url_scheme
143    attach_rule = r'(?P<attach_scheme>attachment|drawing)\:(?P<attach_addr>.*)'
144    page_rule = r'(?P<page_name>.*)'
145
146    link_target_rules = r'|'.join([
147        extern_rule,
148        attach_rule,
149        page_rule,
150    ])
151    link_target_re = re.compile(link_target_rules, re.VERBOSE|re.UNICODE)
152
153    link_rule = r"""
154        (?P<link>
155            \[\[  # link target
156            \s*  # strip space
157            (?P<link_target>[^|]+?)
158            \s*  # strip space
159            (
160                \|  # link description
161                \s*  # strip space
162                (?P<link_desc>
163                    (?:  # 1. we have either a transclusion here (usually a image)
164                        \{\{
165                        \s*[^|]+?\s*  # usually image target (strip space)
166                        (\|\s*[^|]*?\s*  # usually image alt text (optional, strip space)
167                            (\|\s*[^|]*?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
168                            )?
169                        )?
170                        \}\}
171                    )
172                    |
173                    (?:  # 2. or we have simple text here.
174                        [^|]+?
175                    )
176                )?
177                \s*  # strip space
178                (
179                    \|  # link parameters
180                    \s*  # strip space
181                    (?P<link_params>[^|]+?)?
182                    \s*  # strip space
183                )?
184            )?
185            \]\]
186        )
187    """
188
189    transclude_rule = r"""
190        (?P<transclude>
191            \{\{
192            \s*(?P<transclude_target>[^|]+?)\s*  # usually image target (strip space)
193            (\|\s*(?P<transclude_desc>[^|]+?)?\s*  # usually image alt text (optional, strip space)
194                (\|\s*(?P<transclude_params>[^|]+?)?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
195                )?
196            )?
197            \}\}
198        )
199    """
200    text_rule = r"""
201        (?P<simple_text>
202            [^|]+  # some text (not empty, does not contain separator)
203        )
204    """
205    # link descriptions:
206    link_desc_rules = r'|'.join([
207            transclude_rule,
208            text_rule,
209    ])
210    link_desc_re = re.compile(link_desc_rules, re.VERBOSE|re.UNICODE)
211
212    # transclude descriptions:
213    transclude_desc_rules = r'|'.join([
214            text_rule,
215    ])
216    transclude_desc_re = re.compile(transclude_desc_rules, re.VERBOSE|re.UNICODE)
217
218    # lists:
219    ol_rule = ur"""
220        ^\s+  # indentation
221        (?:[0-9]+|[aAiI])\. # arabic, alpha, roman counting
222        (?:\#\d+)?  # optional start number
223        \s  # require one blank afterwards
224    """
225    ol_re = re.compile(ol_rule, re.VERBOSE|re.UNICODE)
226
227    dl_rule = ur"""
228        ^\s+  # indentation
229        .*?::  # definition term::
230        \s  # require on blank afterwards
231    """
232    dl_re = re.compile(dl_rule, re.VERBOSE|re.UNICODE)
233
234    # others
235    indent_re = re.compile(ur"^\s*", re.UNICODE)
236    eol_re = re.compile(r'\r?\n', re.UNICODE)
237
238    # this is used inside parser/pre sections (we just want to know when it's over):
239    parser_unique = u''
240    parser_scan_rule = ur"""
241(?P<parser_end>
242    %s\}\}\}  # in parser/pre, we only look for the end of the parser/pre
243)
244"""
245
246
247    # the big, fat, less ugly one ;)
248    # please be very careful: blanks and # must be escaped with \ !
249    scan_rules = ur"""
250(?P<emph_ibb>
251    '''''(?=[^']+''')  # italic on, bold on, ..., bold off
252)|(?P<emph_ibi>
253    '''''(?=[^']+'')  # italic on, bold on, ..., italic off
254)|(?P<emph_ib_or_bi>
255    '{5}(?=[^'])  # italic and bold or bold and italic
256)|(?P<emph>
257    '{2,3}  # italic or bold
258)|(?P<u>
259    __ # underline
260)|(?P<small>
261    (
262     (?P<small_on>\~-\ ?)  # small on (we eat a trailing blank if it is there)
263    |
264     (?P<small_off>-\~)  # small off
265    )
266)|(?P<big>
267    (
268     (?P<big_on>\~\+\ ?)  # big on (eat trailing blank)
269    |
270     (?P<big_off>\+\~)  # big off
271    )
272)|(?P<strike>
273    (
274     (?P<strike_on>--\()  # strike-through on
275    |
276     (?P<strike_off>\)--)  # strike-through off
277    )
278)|(?P<remark>
279    (
280     (^|(?<=\s))  # we require either beginning of line or some whitespace before a remark begin
281     (?P<remark_on>/\*\s)  # inline remark on (require and eat whitespace after it)
282    )
283    |
284    (
285     (?P<remark_off>\s\*/)  # off (require and eat whitespace before it)
286     (?=\s)  # we require some whitespace after a remark end
287    )
288)|(?P<sup>
289    \^  # superscript on
290    (?P<sup_text>.*?)  # capture the text
291    \^  # off
292)|(?P<sub>
293    ,,  # subscript on
294    (?P<sub_text>.*?)  # capture the text
295    ,,  # off
296)|(?P<tt>
297    \{\{\{  # teletype on
298    (?P<tt_text>.*?)  # capture the text
299    \}\}\}  # off
300)|(?P<tt_bt>
301    `  # teletype (using a backtick) on
302    (?P<tt_bt_text>.*?)  # capture the text
303    `  # off
304)|(?P<interwiki>
305    %(interwiki_rule)s  # OtherWiki:PageName
306)|(?P<word>  # must come AFTER interwiki rule!
307    %(word_rule)s  # CamelCase wiki words
308)|
309%(link_rule)s
310|
311%(transclude_rule)s
312|(?P<url>
313    %(url_rule)s
314)|(?P<email>
315    [-\w._+]+  # name
316    \@  # at
317    [\w-]+(\.[\w-]+)+  # server/domain
318)|(?P<smiley>
319    (^|(?<=\s))  # we require either beginning of line or some space before a smiley
320    (%(smiley)s)  # one of the smileys
321    (?=\s)  # we require some space after the smiley
322)|(?P<macro>
323    <<
324    (?P<macro_name>\w+)  # name of the macro
325    (?:\((?P<macro_args>.*?)\))?  # optionally macro arguments
326    >>
327)|(?P<heading>
328    ^(?P<hmarker>=+)\s+  # some === at beginning of line, eat trailing blanks
329    (?P<heading_text>.*?)  # capture heading text
330    \s+(?P=hmarker)\s$  # some === at end of line (matching amount as we have seen), eat blanks
331)|(?P<parser>
332    \{\{\{  # parser on
333    (?P<parser_unique>(\{*|\w*))  # either some more {{{{ or some chars to solve the nesting problem
334    (?P<parser_line>
335     (
336      \#!  # hash bang
337      (?P<parser_name>\w*)  # we have a parser name (can be empty) directly following the {{{
338      (
339       \s+  # some space ...
340       (?P<parser_args>.+?)  # followed by parser args
341      )?  # parser args are optional
342      \s*  # followed by whitespace (eat it) until EOL
343     )
344    |
345     (?P<parser_nothing>\s*)  # no parser name, only whitespace up to EOL (eat it)
346    )$
347    # "parser off" detection is done with parser_scan_rule!
348)|(?P<comment>
349    ^\#\#.*$  # src code comment, rest of line
350)|(?P<ol>
351    %(ol_rule)s  # ordered list
352)|(?P<dl>
353    %(dl_rule)s  # definition list
354)|(?P<li>
355    ^\s+\*\s*  # unordered list
356)|(?P<li_none>
357    ^\s+\.\s*  # unordered list, no bullets
358)|(?P<indent>
359    ^\s+  # indented by some spaces
360)|(?P<tableZ>
361    \|\|\ $  # the right end of a table row
362)|(?P<table>
363    (?:\|\|)+(?:<(?!<)[^>]*?>)?(?!\|?\s$) # a table
364)|(?P<rule>
365    -{4,}  # hor. rule, min. 4 -
366)|(?P<entity>
367    &(
368      ([a-zA-Z]+)  # symbolic entity, like &uuml;
369      |
370      (\#(\d{1,5}|x[0-9a-fA-F]+))  # numeric entities, like &#42; or &#x42;
371     );
372)|(?P<sgml_entity>  # must come AFTER entity rule!
373    [<>&]  # needs special treatment for html/xml
374)"""  % {
375        'url_scheme': url_scheme,
376        'url_rule': url_rule,
377        'punct': punct_pattern,
378        'ol_rule': ol_rule,
379        'dl_rule': dl_rule,
380        'interwiki_rule': interwiki_rule,
381        'word_rule': word_rule,
382        'link_rule': link_rule,
383        'transclude_rule': transclude_rule,
384        'u': config.chars_upper,
385        'l': config.chars_lower,
386        'smiley': u'|'.join([re.escape(s) for s in config.smileys])}
387    scan_re = re.compile(scan_rules, re.UNICODE|re.VERBOSE)
388
389    # Don't start p before these
390    no_new_p_before = ("heading rule table tableZ tr td "
391                       "ul ol dl dt dd li li_none indent "
392                       "macro parser")
393    no_new_p_before = no_new_p_before.split()
394    no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
395
396    def __init__(self, raw, request, **kw):
397        self.raw = raw
398        self.request = request
399        self.form = request.form # Macro object uses this
400        self._ = request.getText
401        self.cfg = request.cfg
402        self.line_anchors = kw.get('line_anchors', True)
403        self.start_line = kw.get('start_line', 0)
404        self.macro = None
405
406        # currently, there is only a single, optional argument to this parser and
407        # (when given), it is used as class(es) for a div wrapping the formatter output
408        # either use a single class like "comment" or multiple like "comment/red/dotted"
409        self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ')
410
411        self.is_em = 0 # must be int
412        self.is_b = 0 # must be int
413        self.is_u = False
414        self.is_strike = False
415        self.is_big = False
416        self.is_small = False
417        self.is_remark = False
418
419        self.lineno = 0
420        self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
421        self.in_li = 0 # between <li> and </li>
422        self.in_dd = 0 # between <dd> and </dd>
423
424        # states of the parser concerning being inside/outside of some "pre" section:
425        # None == we are not in any kind of pre section (was: 0)
426        # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1)
427        # 'found_parser' == we found a valid parser (was: 2)
428        self.in_pre = None
429
430        self.in_table = 0
431        self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
432
433        # holds the nesting level (in chars) of open lists
434        self.list_indents = []
435        self.list_types = []
436
437    def _close_item(self, result):
438        #result.append("<!-- close item begin -->\n")
439        if self.in_table:
440            result.append(self.formatter.table(0))
441            self.in_table = 0
442        if self.in_li:
443            self.in_li = 0
444            if self.formatter.in_p:
445                result.append(self.formatter.paragraph(0))
446            result.append(self.formatter.listitem(0))
447        if self.in_dd:
448            self.in_dd = 0
449            if self.formatter.in_p:
450                result.append(self.formatter.paragraph(0))
451            result.append(self.formatter.definition_desc(0))
452        #result.append("<!-- close item end -->\n")
453
454    def _u_repl(self, word, groups):
455        """Handle underline."""
456        self.is_u = not self.is_u
457        return self.formatter.underline(self.is_u)
458
459    def _remark_repl(self, word, groups):
460        """Handle remarks."""
461        on = groups.get('remark_on')
462        if on and self.is_remark:
463            return self.formatter.text(word)
464        off = groups.get('remark_off')
465        if off and not self.is_remark:
466            return self.formatter.text(word)
467        self.is_remark = not self.is_remark
468        return self.formatter.span(self.is_remark, css_class='comment')
469    _remark_on_repl = _remark_repl
470    _remark_off_repl = _remark_repl
471
472    def _strike_repl(self, word, groups):
473        """Handle strikethrough."""
474        on = groups.get('strike_on')
475        if on and self.is_strike:
476            return self.formatter.text(word)
477        off = groups.get('strike_off')
478        if off and not self.is_strike:
479            return self.formatter.text(word)
480        self.is_strike = not self.is_strike
481        return self.formatter.strike(self.is_strike)
482    _strike_on_repl = _strike_repl
483    _strike_off_repl = _strike_repl
484
485    def _small_repl(self, word, groups):
486        """Handle small."""
487        on = groups.get('small_on')
488        if on and self.is_small:
489            return self.formatter.text(word)
490        off = groups.get('small_off')
491        if off and not self.is_small:
492            return self.formatter.text(word)
493        self.is_small = not self.is_small
494        return self.formatter.small(self.is_small)
495    _small_on_repl = _small_repl
496    _small_off_repl = _small_repl
497
498    def _big_repl(self, word, groups):
499        """Handle big."""
500        on = groups.get('big_on')
501        if on and self.is_big:
502            return self.formatter.text(word)
503        off = groups.get('big_off')
504        if off and not self.is_big:
505            return self.formatter.text(word)
506        self.is_big = not self.is_big
507        return self.formatter.big(self.is_big)
508    _big_on_repl = _big_repl
509    _big_off_repl = _big_repl
510
511    def _emph_repl(self, word, groups):
512        """Handle emphasis, i.e. '' and '''."""
513        if len(word) == 3:
514            self.is_b = not self.is_b
515            if self.is_em and self.is_b:
516                self.is_b = 2
517            return self.formatter.strong(self.is_b)
518        else:
519            self.is_em = not self.is_em
520            if self.is_em and self.is_b:
521                self.is_em = 2
522            return self.formatter.emphasis(self.is_em)
523
524    def _emph_ibb_repl(self, word, groups):
525        """Handle mixed emphasis, i.e. ''''' followed by '''."""
526        self.is_b = not self.is_b
527        self.is_em = not self.is_em
528        if self.is_em and self.is_b:
529            self.is_b = 2
530        return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
531
532    def _emph_ibi_repl(self, word, groups):
533        """Handle mixed emphasis, i.e. ''''' followed by ''."""
534        self.is_b = not self.is_b
535        self.is_em = not self.is_em
536        if self.is_em and self.is_b:
537            self.is_em = 2
538        return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
539
540    def _emph_ib_or_bi_repl(self, word, groups):
541        """Handle mixed emphasis, exactly five '''''."""
542        b_before_em = self.is_b > self.is_em > 0
543        self.is_b = not self.is_b
544        self.is_em = not self.is_em
545        if b_before_em:
546            return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
547        else:
548            return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
549
550    def _sup_repl(self, word, groups):
551        """Handle superscript."""
552        text = groups.get('sup_text', '')
553        return (self.formatter.sup(1) +
554                self.formatter.text(text) +
555                self.formatter.sup(0))
556    _sup_text_repl = _sup_repl
557
558    def _sub_repl(self, word, groups):
559        """Handle subscript."""
560        text = groups.get('sub_text', '')
561        return (self.formatter.sub(1) +
562               self.formatter.text(text) +
563               self.formatter.sub(0))
564    _sub_text_repl = _sub_repl
565
566    def _tt_repl(self, word, groups):
567        """Handle inline code."""
568        tt_text = groups.get('tt_text', '')
569        return (self.formatter.code(1) +
570                self.formatter.text(tt_text) +
571                self.formatter.code(0))
572    _tt_text_repl = _tt_repl
573
574    def _tt_bt_repl(self, word, groups):
575        """Handle backticked inline code."""
576        tt_bt_text = groups.get('tt_bt_text', '')
577        return (self.formatter.code(1, css="backtick") +
578                self.formatter.text(tt_bt_text) +
579                self.formatter.code(0))
580    _tt_bt_text_repl = _tt_bt_repl
581
582    def _rule_repl(self, word, groups):
583        """Handle sequences of dashes."""
584        result = self._undent() + self._closeP()
585        if len(word) <= 4:
586            result += self.formatter.rule()
587        else:
588            # Create variable rule size 1 - 6. Actual size defined in css.
589            size = min(len(word), 10) - 4
590            result += self.formatter.rule(size)
591        return result
592
593    def _interwiki_repl(self, word, groups):
594        """Handle InterWiki links."""
595        wiki = groups.get('interwiki_wiki')
596        page = groups.get('interwiki_page')
597
598        wikitag_bad = wikiutil.resolve_interwiki(self.request, wiki, page)[3]
599        if wikitag_bad:
600            text = groups.get('interwiki')
601            return self.formatter.text(text)
602        else:
603            page, anchor = wikiutil.split_anchor(page)
604            return (self.formatter.interwikilink(1, wiki, page, anchor=anchor) +
605                    self.formatter.text(page) +
606                    self.formatter.interwikilink(0, wiki, page))
607    _interwiki_wiki_repl = _interwiki_repl
608    _interwiki_page_repl = _interwiki_repl
609
610    def _word_repl(self, word, groups):
611        """Handle WikiNames."""
612        bang = ''
613        bang_present = groups.get('word_bang')
614        if bang_present:
615            if self.cfg.bang_meta:
616                # handle !NotWikiNames
617                return self.formatter.nowikiword(word)
618            else:
619                bang = self.formatter.text('!')
620        name = groups.get('word_name')
621        current_page = self.formatter.page.page_name
622        abs_name = wikiutil.AbsPageName(current_page, name)
623        # if a simple, self-referencing link, emit it as plain text
624        if abs_name == current_page:
625            return self.formatter.text(word)
626        else:
627            abs_name, anchor = wikiutil.split_anchor(abs_name)
628            return (bang +
629                    self.formatter.pagelink(1, abs_name, anchor=anchor) +
630                    self.formatter.text(word) +
631                    self.formatter.pagelink(0, abs_name))
632    _word_bang_repl = _word_repl
633    _word_name_repl = _word_repl
634    _word_anchor_repl = _word_repl
635
636    def _url_repl(self, word, groups):
637        """Handle literal URLs."""
638        scheme = groups.get('url_scheme', 'http')
639        target = groups.get('url_target', '')
640        return (self.formatter.url(1, target, css=scheme) +
641                self.formatter.text(target) +
642                self.formatter.url(0))
643    _url_target_repl = _url_repl
644    _url_scheme_repl = _url_repl
645
646    def _transclude_description(self, desc, default_text=''):
647        """ parse a string <desc> valid as transclude description (text, ...)
648            and return the description.
649
650            We do NOT use wikiutil.escape here because it is html specific (the
651            html formatter, if used, does this for all html attributes).
652
653            We do NOT call formatter.text here because it sometimes is just used
654            for some alt and/or title attribute, but not emitted as text.
655
656            @param desc: the transclude description to parse
657            @param default_text: use this text if parsing desc returns nothing.
658        """
659        m = self.transclude_desc_re.match(desc)
660        if m:
661            if m.group('simple_text'):
662                desc = m.group('simple_text')
663        else:
664            desc = default_text
665        return desc
666
667    def _get_params(self, params, tag_attrs=None, acceptable_attrs=None, query_args=None):
668        """ parse the parameters of link/transclusion markup,
669            defaults can be a dict with some default key/values
670            that will be in the result as given, unless overriden
671            by the params.
672        """
673        if tag_attrs is None:
674            tag_attrs = {}
675        if query_args is None:
676            query_args = {}
677        if params:
678            fixed, kw, trailing = wikiutil.parse_quoted_separated(params)
679            # we ignore fixed and trailing args and only use kw args:
680            if acceptable_attrs is None:
681                acceptable_attrs = []
682            for key, val in kw.items():
683                # wikiutil.escape for key/val must be done by (html) formatter!
684                if key in acceptable_attrs:
685                    # tag attributes must be string type
686                    tag_attrs[str(key)] = val
687                elif key.startswith('&'):
688                    key = key[1:]
689                    query_args[key] = val
690        return tag_attrs, query_args
691
692    def _transclude_repl(self, word, groups):
693        """Handles transcluding content, usually embedding images."""
694        target = groups.get('transclude_target', '')
695        target = wikiutil.url_unquote(target)
696        desc = groups.get('transclude_desc', '') or ''
697        params = groups.get('transclude_params', u'') or u''
698        acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS
699        acceptable_attrs_object = ['class', 'title', 'width', 'height', # no style because of JS
700                                  'type', 'standby', ] # we maybe need a hack for <PARAM> here
701        m = self.link_target_re.match(target)
702        if m:
703            if m.group('extern_addr'):
704                # currently only supports ext. image inclusion
705                target = m.group('extern_addr')
706                desc = self._transclude_description(desc, target)
707                tag_attrs, query_args = self._get_params(params,
708                                                         tag_attrs={'class': 'external_image',
709                                                                    'alt': desc,
710                                                                    'title': desc, },
711                                                         acceptable_attrs=acceptable_attrs_img)
712                return self.formatter.image(src=target, **tag_attrs)
713                # FF2 has a bug with target mimetype detection, it looks at the url path
714                # and expects to find some "filename extension" there (like .png) and this
715                # (not the response http headers) will set the default content-type of
716                # the object. This will often work for staticly served files, but
717                # fails for MoinMoin attachments (they don't have the filename.ext in the
718                # path, but in the query string). FF3 seems to have this bug fixed, opera 9.2
719                # also works.
720                #return (self.formatter.transclusion(1, data=target) +
721                #        desc +
722                #        self.formatter.transclusion(0))
723
724            elif m.group('attach_scheme'):
725                scheme = m.group('attach_scheme')
726                url = wikiutil.url_unquote(m.group('attach_addr'))
727                if scheme == 'attachment':
728                    mt = wikiutil.MimeType(filename=url)
729                    if mt.major == 'text':
730                        desc = self._transclude_description(desc, url)
731                        return self.formatter.attachment_inlined(url, desc)
732                    # destinguishs if browser need a plugin in place
733                    elif mt.major == 'image' and mt.minor in config.browser_supported_images:
734                        desc = self._transclude_description(desc, url)
735                        tag_attrs, query_args = self._get_params(params,
736                                                                 tag_attrs={'alt': desc,
737                                                                            'title': desc, },
738                                                                 acceptable_attrs=acceptable_attrs_img)
739                        return self.formatter.attachment_image(url, **tag_attrs)
740                    else:
741                        from MoinMoin.action import AttachFile
742                        current_pagename = self.formatter.page.page_name
743                        pagename, filename = AttachFile.absoluteName(url, current_pagename)
744                        if AttachFile.exists(self.request, pagename, filename):
745                            href = AttachFile.getAttachUrl(pagename, filename, self.request)
746                            tag_attrs, query_args = self._get_params(params,
747                                                                     tag_attrs={'title': desc, },
748                                                                     acceptable_attrs=acceptable_attrs_object)
749                            return (self.formatter.transclusion(1, data=href, type=mt.spoil(), **tag_attrs) +
750                                    self.formatter.text(self._transclude_description(desc, url)) +
751                                    self.formatter.transclusion(0))
752                        else:
753                            return (self.formatter.attachment_link(1, url) +
754                                    self.formatter.text(self._transclude_description(desc, url)) +
755                                    self.formatter.attachment_link(0))
756
757                        #NOT USED CURRENTLY:
758
759                        # use EmbedObject for other mimetypes
760                        if mt is not None:
761                            from MoinMoin import macro
762                            macro.request = self.request
763                            macro.formatter = self.request.html_formatter
764                            p = Parser("##\n", request)
765                            m = macro.Macro(p)
766                            pagename = self.formatter.page.page_name
767                            return m.execute('EmbedObject', u'target=%s' % url)
768                elif scheme == 'drawing':
769                    url = wikiutil.drawing2fname(url)
770                    desc = self._transclude_description(desc, url)
771                    if desc:
772                        tag_attrs= {'alt': desc, 'title': desc, }
773                    else:
774                        tag_attrs = {}
775                    tag_attrs, query_args = self._get_params(params,
776                                                             tag_attrs=tag_attrs,
777                                                             acceptable_attrs=acceptable_attrs_img)
778                    return self.formatter.attachment_drawing(url, desc, **tag_attrs)
779
780            elif m.group('page_name'):
781                # experimental client side transclusion
782                page_name_all = m.group('page_name')
783                if ':' in page_name_all:
784                    wiki_name, page_name = page_name_all.split(':', 1)
785                    wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
786                else:
787                    err = True
788                if err: # not a interwiki link / not in interwiki map
789                    tag_attrs, query_args = self._get_params(params,
790                                                             tag_attrs={'type': 'text/html',
791                                                                        'width': '100%', },
792                                                             acceptable_attrs=acceptable_attrs_object)
793                    if 'action' not in query_args:
794                        query_args['action'] = 'content'
795                    url = Page(self.request, page_name_all).url(self.request, querystr=query_args)
796                    return (self.formatter.transclusion(1, data=url, **tag_attrs) +
797                            self.formatter.text(self._transclude_description(desc, page_name_all)) +
798                            self.formatter.transclusion(0))
799                    #return u"Error: <<Include(%s,%s)>> emulation missing..." % (page_name, args)
800                else: # looks like a valid interwiki link
801                    url = wikiutil.join_wiki(wikiurl, wikitail)
802                    tag_attrs, query_args = self._get_params(params,
803                                                             tag_attrs={'type': 'text/html',
804                                                                        'width': '100%', },
805                                                             acceptable_attrs=acceptable_attrs_object)
806                    if 'action' not in query_args:
807                        query_args['action'] = 'content' # XXX moin specific
808                    url += '?%s' % wikiutil.makeQueryString(query_args)
809                    return (self.formatter.transclusion(1, data=url, **tag_attrs) +
810                            self.formatter.text(self._transclude_description(desc, page_name)) +
811                            self.formatter.transclusion(0))
812                    #return u"Error: <<RemoteInclude(%s:%s,%s)>> still missing." % (wiki_name, page_name, args)
813
814            else:
815                desc = self._transclude_description(desc, target)
816                return self.formatter.text('{{%s|%s|%s}}' % (target, desc, params))
817        return word +'???'
818    _transclude_target_repl = _transclude_repl
819    _transclude_desc_repl = _transclude_repl
820    _transclude_params_repl = _transclude_repl
821
822    def _link_description(self, desc, target='', default_text=''):
823        """ parse a string <desc> valid as link description (text, transclusion, ...)
824            and return formatted content.
825
826            @param desc: the link description to parse
827            @param default_text: use this text (formatted as text) if parsing
828                                 desc returns nothing.
829            @param target: target of the link (as readable markup) - used for
830                           transcluded image's description
831        """
832        m = self.link_desc_re.match(desc)
833        if m:
834            if m.group('simple_text'):
835                desc = m.group('simple_text')
836                desc = self.formatter.text(desc)
837            elif m.group('transclude'):
838                groupdict = m.groupdict()
839                if groupdict.get('transclude_desc') is None:
840                    # if transcluded obj (image) has no description, use target for it
841                    groupdict['transclude_desc'] = target
842                desc = m.group('transclude')
843                desc = self._transclude_repl(desc, groupdict)
844        else:
845            desc = default_text
846            if desc:
847                desc = self.formatter.text(desc)
848        return desc
849
850    def _link_repl(self, word, groups):
851        """Handle [[target|text]] links."""
852        target = groups.get('link_target', '')
853        desc = groups.get('link_desc', '') or ''
854        params = groups.get('link_params', u'') or u''
855        acceptable_attrs = ['class', 'title', 'target', 'accesskey', 'rel', ] # no style because of JS
856        mt = self.link_target_re.match(target)
857        if mt:
858            if mt.group('page_name'):
859                page_name_and_anchor = mt.group('page_name')
860                if ':' in page_name_and_anchor:
861                    wiki_name, page_name = page_name_and_anchor.split(':', 1)
862                    wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
863                else:
864                    err = True
865                if err: # not a interwiki link / not in interwiki map
866                    page_name, anchor = wikiutil.split_anchor(page_name_and_anchor)
867                    current_page = self.formatter.page.page_name
868                    if not page_name:
869                        page_name = current_page
870                    # handle relative links
871                    abs_page_name = wikiutil.AbsPageName(current_page, page_name)
872                    tag_attrs, query_args = self._get_params(params,
873                                                             tag_attrs={},
874                                                             acceptable_attrs=acceptable_attrs)
875                    return (self.formatter.pagelink(1, abs_page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
876                            self._link_description(desc, target, page_name_and_anchor) +
877                            self.formatter.pagelink(0, abs_page_name))
878                else: # interwiki link
879                    page_name, anchor = wikiutil.split_anchor(page_name)
880                    tag_attrs, query_args = self._get_params(params,
881                                                             tag_attrs={},
882                                                             acceptable_attrs=acceptable_attrs)
883                    return (self.formatter.interwikilink(1, wiki_name, page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
884                            self._link_description(desc, target, page_name) +
885                            self.formatter.interwikilink(0, wiki_name, page_name))
886
887            elif mt.group('extern_addr'):
888                scheme = mt.group('extern_scheme')
889                target = mt.group('extern_addr')
890                tag_attrs, query_args = self._get_params(params,
891                                                         tag_attrs={'class': scheme, },
892                                                         acceptable_attrs=acceptable_attrs)
893                return (self.formatter.url(1, target, **tag_attrs) +
894                        self._link_description(desc, target, target) +
895                        self.formatter.url(0))
896
897            elif mt.group('attach_scheme'):
898                scheme = mt.group('attach_scheme')
899                url = wikiutil.url_unquote(mt.group('attach_addr'))
900                tag_attrs, query_args = self._get_params(params,
901                                                         tag_attrs={'title': desc, },
902                                                         acceptable_attrs=acceptable_attrs)
903                if scheme == 'attachment': # ZZZ
904                    return (self.formatter.attachment_link(1, url, querystr=query_args, **tag_attrs) +
905                            self._link_description(desc, target, url) +
906                            self.formatter.attachment_link(0))
907                elif scheme == 'drawing':
908                    url = wikiutil.drawing2fname(url)
909                    return self.formatter.attachment_drawing(url, desc, alt=desc, **tag_attrs)
910            else:
911                if desc:
912                    desc = '|' + desc
913                return self.formatter.text('[[%s%s]]' % (target, desc))
914    _link_target_repl = _link_repl
915    _link_desc_repl = _link_repl
916    _link_params_repl = _link_repl
917
918    def _email_repl(self, word, groups):
919        """Handle email addresses (without a leading mailto:)."""
920        return (self.formatter.url(1, "mailto:%s" % word, css='mailto') +
921                self.formatter.text(word) +
922                self.formatter.url(0))
923
924    def _sgml_entity_repl(self, word, groups):
925        """Handle SGML entities."""
926        return self.formatter.text(word)
927
928    def _entity_repl(self, word, groups):
929        """Handle numeric (decimal and hexadecimal) and symbolic SGML entities."""
930        return self.formatter.rawHTML(word)
931
932    def _indent_repl(self, match, groups):
933        """Handle pure indentation (no - * 1. markup)."""
934        result = []
935        if not (self.in_li or self.in_dd):
936            self._close_item(result)
937            self.in_li = 1
938            css_class = None
939            if self.line_was_empty and not self.first_list_item:
940                css_class = 'gap'
941            result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
942        return ''.join(result)
943
944    def _li_none_repl(self, match, groups):
945        """Handle type=none (" .") lists."""
946        result = []
947        self._close_item(result)
948        self.in_li = 1
949        css_class = None
950        if self.line_was_empty and not self.first_list_item:
951            css_class = 'gap'
952        result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
953        return ''.join(result)
954
955    def _li_repl(self, match, groups):
956        """Handle bullet (" *") lists."""
957        result = []
958        self._close_item(result)
959        self.in_li = 1
960        css_class = None
961        if self.line_was_empty and not self.first_list_item:
962            css_class = 'gap'
963        result.append(self.formatter.listitem(1, css_class=css_class))
964        return ''.join(result)
965
966    def _ol_repl(self, match, groups):
967        """Handle numbered lists."""
968        return self._li_repl(match, groups)
969
970    def _dl_repl(self, match, groups):
971        """Handle definition lists."""
972        result = []
973        self._close_item(result)
974        self.in_dd = 1
975        result.extend([
976            self.formatter.definition_term(1),
977            self.formatter.text(match[1:-3].lstrip(' ')),
978            self.formatter.definition_term(0),
979            self.formatter.definition_desc(1),
980        ])
981        return ''.join(result)
982
983    def _indent_level(self):
984        """Return current char-wise indent level."""
985        return len(self.list_indents) and self.list_indents[-1]
986
987    def _indent_to(self, new_level, list_type, numtype, numstart):
988        """Close and open lists."""
989        openlist = []   # don't make one out of these two statements!
990        closelist = []
991
992        if self._indent_level() != new_level and self.in_table:
993            closelist.append(self.formatter.table(0))
994            self.in_table = 0
995
996        while self._indent_level() > new_level:
997            self._close_item(closelist)
998            if self.list_types[-1] == 'ol':
999                tag = self.formatter.number_list(0)
1000            elif self.list_types[-1] == 'dl':
1001                tag = self.formatter.definition_list(0)
1002            else:
1003                tag = self.formatter.bullet_list(0)
1004            closelist.append(tag)
1005
1006            del self.list_indents[-1]
1007            del self.list_types[-1]
1008
1009            if self.list_types: # we are still in a list
1010                if self.list_types[-1] == 'dl':
1011                    self.in_dd = 1
1012                else:
1013                    self.in_li = 1
1014
1015        # Open new list, if necessary
1016        if self._indent_level() < new_level:
1017            self.list_indents.append(new_level)
1018            self.list_types.append(list_type)
1019
1020            if self.formatter.in_p:
1021                closelist.append(self.formatter.paragraph(0))
1022
1023            if list_type == 'ol':
1024                tag = self.formatter.number_list(1, numtype, numstart)
1025            elif list_type == 'dl':
1026                tag = self.formatter.definition_list(1)
1027            else:
1028                tag = self.formatter.bullet_list(1)
1029            openlist.append(tag)
1030
1031            self.first_list_item = 1
1032            self.in_li = 0
1033            self.in_dd = 0
1034
1035        # If list level changes, close an open table
1036        if self.in_table and (openlist or closelist):
1037            closelist[0:0] = [self.formatter.table(0)]
1038            self.in_table = 0
1039
1040        self.in_list = self.list_types != []
1041        return ''.join(closelist) + ''.join(openlist)
1042
1043    def _undent(self):
1044        """Close all open lists."""
1045        result = []
1046        #result.append("<!-- _undent start -->\n")
1047        self._close_item(result)
1048        for type in self.list_types[::-1]:
1049            if type == 'ol':
1050                result.append(self.formatter.number_list(0))
1051            elif type == 'dl':
1052                result.append(self.formatter.definition_list(0))
1053            else:
1054                result.append(self.formatter.bullet_list(0))
1055        #result.append("<!-- _undent end -->\n")
1056        self.list_indents = []
1057        self.list_types = []
1058        return ''.join(result)
1059
1060    def _getTableAttrs(self, attrdef):
1061        attr_rule = r'^(\|\|)*<(?!<)(?P<attrs>[^>]*?)>'
1062        m = re.match(attr_rule, attrdef, re.U)
1063        if not m:
1064            return {}, ''
1065        attrdef = m.group('attrs')
1066
1067        # extension for special table markup
1068        def table_extension(key, parser, attrs, wiki_parser=self):
1069            """ returns: tuple (found_flag, msg)
1070                found_flag: whether we found something and were able to process it here
1071                  true for special stuff like 100% or - or #AABBCC
1072                  false for style xxx="yyy" attributes
1073                msg: "" or an error msg
1074            """
1075            _ = wiki_parser._
1076            found = False
1077            msg = ''
1078            if key[0] in "0123456789":
1079                token = parser.get_token()
1080                if token != '%':
1081                    wanted = '%'
1082                    msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
1083                        'wanted': wanted, 'key': key, 'token': token}
1084                else:
1085                    try:
1086                        dummy = int(key)
1087                    except ValueError:
1088                        msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
1089                            'key': key, 'token': token}
1090                    else:
1091                        found = True
1092                        attrs['width'] = '"%s%%"' % key
1093            elif key == '-':
1094                arg = parser.get_token()
1095                try:
1096                    dummy = int(arg)
1097                except ValueError:
1098                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
1099                        'arg': arg, 'key': key}
1100                else:
1101                    found = True
1102                    attrs['colspan'] = '"%s"' % arg
1103            elif key == '|':
1104                arg = parser.get_token()
1105                try:
1106                    dummy = int(arg)
1107                except ValueError:
1108                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
1109                        'arg': arg, 'key': key}
1110                else:
1111                    found = True
1112                    attrs['rowspan'] = '"%s"' % arg
1113            elif key == '(':
1114                found = True
1115                attrs['align'] = '"left"'
1116            elif key == ':':
1117                found = True
1118                attrs['align'] = '"center"'
1119            elif key == ')':
1120                found = True
1121                attrs['align'] = '"right"'
1122            elif key == '^':
1123                found = True
1124                attrs['valign'] = '"top"'
1125            elif key == 'v':
1126                found = True
1127                attrs['valign'] = '"bottom"'
1128            elif key == '#':
1129                arg = parser.get_token()
1130                try:
1131                    if len(arg) != 6:
1132                        raise ValueError
1133                    dummy = int(arg, 16)
1134                except ValueError:
1135                    msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
1136                        'arg': arg, 'key': key}
1137                else:
1138                    found = True
1139                    attrs['bgcolor'] = '"#%s"' % arg
1140            return found, self.formatter.rawHTML(msg)
1141
1142        # scan attributes
1143        attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
1144        if msg:
1145            msg = '<strong class="highlight">%s</strong>' % msg
1146        #logging.debug("parseAttributes returned %r" % attr)
1147        return attr, msg
1148
1149    def _tableZ_repl(self, word, groups):
1150        """Handle table row end."""
1151        if self.in_table:
1152            result = ''
1153            # REMOVED: check for self.in_li, p should always close
1154            if self.formatter.in_p:
1155                result = self.formatter.paragraph(0)
1156            result += self.formatter.table_cell(0) + self.formatter.table_row(0)
1157            return result
1158        else:
1159            return self.formatter.text(word)
1160
1161    def _table_repl(self, word, groups):
1162        """Handle table cell separator."""
1163        if self.in_table:
1164            result = []
1165            # check for attributes
1166            attrs, attrerr = self._getTableAttrs(word)
1167
1168            # start the table row?
1169            if self.table_rowstart:
1170                self.table_rowstart = 0
1171                result.append(self.formatter.table_row(1, attrs))
1172            else:
1173                # Close table cell, first closing open p
1174                # REMOVED check for self.in_li, paragraph should close always!
1175                if self.formatter.in_p:
1176                    result.append(self.formatter.paragraph(0))
1177                result.append(self.formatter.table_cell(0))
1178
1179            # check for adjacent cell markers
1180            if word.count("|") > 2:
1181                if 'align' not in attrs and \
1182                   not ('style' in attrs and 'text-align' in attrs['style'].lower()):
1183                    # add center alignment if we don't have some alignment already
1184                    attrs['align'] = '"center"'
1185                if 'colspan' not in attrs:
1186                    attrs['colspan'] = '"%d"' % (word.count("|")/2)
1187
1188            # return the complete cell markup
1189            result.append(self.formatter.table_cell(1, attrs) + attrerr)
1190            result.append(self._line_anchordef())
1191            return ''.join(result)
1192        else:
1193            return self.formatter.text(word)
1194
1195    def _heading_repl(self, word, groups):
1196        """Handle section headings."""
1197        heading_text = groups.get('heading_text', '')
1198        depth = min(len(groups.get('hmarker')), 5)
1199        return ''.join([
1200            self._closeP(),
1201            self.formatter.heading(1, depth, id=heading_text),
1202            self.formatter.text(heading_text),
1203            self.formatter.heading(0, depth),
1204        ])
1205    _heading_text_repl = _heading_repl
1206
1207    def _parser_repl(self, word, groups):
1208        """Handle parsed code displays."""
1209        self.parser = None
1210        self.parser_name = None
1211        self.parser_lines = []
1212        parser_line = word = groups.get('parser_line', u'')
1213        parser_name = groups.get('parser_name', None)
1214        parser_args = groups.get('parser_args', None)
1215        parser_nothing = groups.get('parser_nothing', None)
1216        parser_unique = groups.get('parser_unique', u'') or u''
1217        #logging.debug("_parser_repl: parser_name %r parser_args %r parser_unique %r" % (parser_name, parser_args, parser_unique))
1218        if set(parser_unique) == set('{'): # just some more {{{{{{
1219            parser_unique = u'}' * len(parser_unique) # for symmetry cosmetic reasons
1220        self.parser_unique = parser_unique
1221        if parser_name is not None:
1222            # First try to find a parser for this
1223            if parser_name == u'':
1224                # empty bang paths lead to a normal code display
1225                # can be used to escape real, non-empty bang paths
1226                #logging.debug("_parser_repl: empty bangpath")
1227                parser_name = 'text'
1228                word = ''
1229        elif parser_nothing is None:
1230            # there was something non-whitespace following the {{{
1231            parser_name = 'text'
1232
1233        self.setParser(parser_name)
1234        if not self.parser and parser_name:
1235            # loading the desired parser didn't work, retry a safe option:
1236            wanted_parser = parser_name
1237            parser_name = 'text'
1238            self.setParser(parser_name)
1239            word = '%s %s (-)' % (wanted_parser, parser_args)  # indication that it did not work
1240
1241        if self.parser:
1242            self.parser_name = parser_name
1243            self.in_pre = 'found_parser'
1244            if word:
1245                self.parser_lines.append(word)
1246        else:
1247            self.in_pre = 'search_parser'
1248
1249        #logging.debug("_parser_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
1250        return ''
1251    _parser_unique_repl = _parser_repl
1252    _parser_line_repl = _parser_repl
1253    _parser_name_repl = _parser_repl
1254    _parser_args_repl = _parser_repl
1255    _parser_nothing_repl = _parser_repl
1256
1257    def _parser_content(self, line):
1258        """ handle state and collecting lines for parser in pre/parser sections """
1259        #logging.debug("parser_content: %r" % line)
1260        if self.in_pre == 'search_parser' and line.strip():
1261            # try to find a parser specification
1262            parser_name = ''
1263            if line.strip().startswith("#!"):
1264                parser_name = line.strip()[2:]
1265            if parser_name:
1266                parser_name = parser_name.split()[0]
1267            else:
1268                parser_name = 'text'
1269            self.setParser(parser_name)
1270
1271            if not self.parser:
1272                parser_name = 'text'
1273                self.setParser(parser_name)
1274
1275            if self.parser:
1276                self.in_pre = 'found_parser'
1277                self.parser_lines.append(line)
1278                self.parser_name = parser_name
1279
1280        elif self.in_pre == 'found_parser':
1281            # collect the content lines
1282            self.parser_lines.append(line)
1283
1284        return ''  # we emit the content after reaching the end of the parser/pre section
1285
1286    def _parser_end_repl(self, word, groups):
1287        """ when we reach the end of a parser/pre section,
1288            we call the parser with the lines we collected
1289        """
1290        #if self.in_pre:
1291        self.in_pre = None
1292        self.inhibit_p = 0
1293        #logging.debug("_parser_end_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
1294        self.request.write(self._closeP())
1295        if self.parser_name is None:
1296            # we obviously did not find a parser specification
1297            self.parser_name = 'text'
1298        result = self.formatter.parser(self.parser_name, self.parser_lines)
1299        del self.parser_lines
1300        self.in_pre = None
1301        self.parser = None
1302        return result
1303
1304    def _smiley_repl(self, word, groups):
1305        """Handle smileys."""
1306        return self.formatter.smiley(word)
1307
1308    def _comment_repl(self, word, groups):
1309        # if we are in a paragraph, we must close it so that normal text following
1310        # in the line below the comment will reopen a new paragraph.
1311        if self.formatter.in_p:
1312            self.formatter.paragraph(0)
1313        self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
1314        return self.formatter.comment(word)
1315
1316    def _closeP(self):
1317        if self.formatter.in_p:
1318            return self.formatter.paragraph(0)
1319        return ''
1320
1321    def _macro_repl(self, word, groups):
1322        """Handle macros."""
1323        macro_name = groups.get('macro_name')
1324        macro_args = groups.get('macro_args')
1325        self.inhibit_p = 0 # 1 fixed macros like UserPreferences (in the past, gone now), 0 fixes paragraph formatting for macros
1326
1327        # create macro instance
1328        if self.macro is None:
1329            self.macro = macro.Macro(self)
1330        return self.formatter.macro(self.macro, macro_name, macro_args, markup=groups.get('macro'))
1331    _macro_name_repl = _macro_repl
1332    _macro_args_repl = _macro_repl
1333
1334    def scan(self, line, inhibit_p=False):
1335        """ Scans one line
1336        Append text before match, invoke replace() with match, and add text after match.
1337        """
1338        result = []
1339        lastpos = 0 # absolute position within line
1340        line_length = len(line)
1341
1342        ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
1343        while lastpos <= line_length: # it is <=, not <, because we need to process the empty line also
1344            parser_scan_re = re.compile(self.parser_scan_rule % re.escape(self.parser_unique), re.VERBOSE|re.UNICODE)
1345            scan_re = self.in_pre and parser_scan_re or self.scan_re
1346            match = scan_re.search(line, lastpos)
1347            if match:
1348                start = match.start()
1349                if lastpos < start:
1350                    if self.in_pre:
1351                        self._parser_content(line[lastpos:start])
1352                    else:
1353                        ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
1354                        if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p):
1355                            result.append(self.formatter.paragraph(1, css_class="line862"))
1356                        # add the simple text in between lastpos and beginning of current match
1357                        result.append(self.formatter.text(line[lastpos:start]))
1358
1359                # Replace match with markup
1360                if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
1361                        self.in_table or self.in_list):
1362                    result.append(self.formatter.paragraph(1, css_class="line867"))
1363                result.append(self.replace(match, inhibit_p))
1364                end = match.end()
1365                lastpos = end
1366                if start == end:
1367                    # we matched an empty string
1368                    lastpos += 1 # proceed, we don't want to match this again
1369            else:
1370                if self.in_pre:
1371                    # ilastpos is more then 0 and result of line slice is empty make useless line
1372                    if not (lastpos > 0 and line[lastpos:] == ''):
1373                        self._parser_content(line[lastpos:])
1374                elif line[lastpos:]:
1375                    ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
1376                    if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
1377                            self.in_li or self.in_dd):
1378                        result.append(self.formatter.paragraph(1, css_class="line874"))
1379                    # add the simple text (no markup) after last match
1380                    result.append(self.formatter.text(line[lastpos:]))
1381                break # nothing left to do!
1382        return u''.join(result)
1383
1384    def _replace(self, match):
1385        """ Same as replace() but with no magic """
1386        for name, text in match.groupdict().iteritems():
1387            if text is not None:
1388                # Get replace method and replace text
1389                replace_func = getattr(self, '_%s_repl' % name)
1390                result = replace_func(text, match.groupdict())
1391                return result
1392
1393    def replace(self, match, inhibit_p=False):
1394        """ Replace match using type name """
1395        result = []
1396        for type, hit in match.groupdict().items():
1397            if hit is not None and not type in ["hmarker", ]:
1398
1399                ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
1400                # Open p for certain types
1401                if not (inhibit_p or self.inhibit_p or self.formatter.in_p
1402                        or self.in_pre or (type in self.no_new_p_before)):
1403                    result.append(self.formatter.paragraph(1, css_class="line891"))
1404
1405                # Get replace method and replace hit
1406                replace_func = getattr(self, '_%s_repl' % type)
1407                result.append(replace_func(hit, match.groupdict()))
1408                return ''.join(result)
1409        else:
1410            # We should never get here
1411            import pprint
1412            raise Exception("Can't handle match %r\n%s\n%s" % (
1413                match,
1414                pprint.pformat(match.groupdict()),
1415                pprint.pformat(match.groups()),
1416            ))
1417
1418        return ""
1419
1420    def _line_anchordef(self):
1421        if self.line_anchors and not self.line_anchor_printed:
1422            self.line_anchor_printed = 1
1423            return self.formatter.line_anchordef(self.lineno)
1424        else:
1425            return ''
1426
1427    def format(self, formatter, inhibit_p=False, **kw):
1428        """ For each line, scan through looking for magic
1429            strings, outputting verbatim any intervening text.
1430        """
1431        self.formatter = formatter
1432        self.hilite_re = self.formatter.page.hilite_re
1433
1434        # get text and replace TABs
1435        rawtext = self.raw.expandtabs()
1436
1437        # go through the lines
1438
1439        for lineno in range(1, self.start_line + 1):
1440            self.request.write(self.formatter.line_anchordef(lineno))
1441
1442        self.lineno = self.start_line
1443        self.lines = self.eol_re.split(rawtext)
1444        self.line_is_empty = 0
1445
1446        self.in_processing_instructions = 1
1447
1448        if self.wrapping_div_class:
1449            self.request.write(self.formatter.div(1, css_class=self.wrapping_div_class))
1450
1451        # Main loop
1452        for line in self.lines:
1453            self.lineno += 1
1454
1455            self.line_anchor_printed = 0
1456            if not self.in_table:
1457                self.request.write(self._line_anchordef())
1458            self.table_rowstart = 1
1459            self.line_was_empty = self.line_is_empty
1460            self.line_is_empty = 0
1461            self.first_list_item = 0
1462            self.inhibit_p = 0
1463
1464            # ignore processing instructions
1465            if self.in_processing_instructions:
1466                found = False
1467                for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
1468                           "#pragma", "#form", "#acl", "#language"):
1469                    if line.lower().startswith(pi):
1470                        self.request.write(self.formatter.comment(line))
1471                        found = True
1472                        break
1473                if not found:
1474                    self.in_processing_instructions = 0
1475                else:
1476                    continue # do not parse this line
1477
1478            if not self.in_pre:
1479                # we don't have \n as whitespace any more
1480                # This is the space between lines we join to one paragraph
1481                line += ' '
1482
1483                # Paragraph break on empty lines
1484                if not line.strip():
1485                    if self.in_table:
1486                        self.request.write(self.formatter.table(0))
1487                        self.request.write(self._line_anchordef())
1488                        self.in_table = 0
1489                    # CHANGE: removed check for not self.list_types
1490                    # p should close on every empty line
1491                    if self.formatter.in_p:
1492                        self.request.write(self.formatter.paragraph(0))
1493                    self.line_is_empty = 1
1494                    continue
1495
1496                # Check indent level
1497                indent = self.indent_re.match(line)
1498                indlen = len(indent.group(0))
1499                indtype = "ul"
1500                numtype = None
1501                numstart = None
1502                if indlen:
1503                    match = self.ol_re.match(line)
1504                    if match:
1505                        numtype, numstart = match.group(0).strip().split('.')
1506                        numtype = numtype[0]
1507
1508                        if numstart and numstart[0] == "#":
1509                            numstart = int(numstart[1:])
1510                        else:
1511                            numstart = None
1512
1513                        indtype = "ol"
1514                    else:
1515                        match = self.dl_re.match(line)
1516                        if match:
1517                            indtype = "dl"
1518
1519                # output proper indentation tags
1520                self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
1521
1522                # Table mode
1523                # TODO: move into function?
1524                if (not self.in_table and line[indlen:indlen + 2] == "||"
1525                    and line.endswith("|| ") and len(line) >= 5 + indlen):
1526                    # Start table
1527                    if self.list_types and not self.in_li:
1528                        self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
1529                        ## CHANGE: no automatic p on li
1530                        ##self.request.write(self.formatter.paragraph(1))
1531                        self.in_li = 1
1532
1533                    # CHANGE: removed check for self.in_li
1534                    # paragraph should end before table, always!
1535                    if self.formatter.in_p:
1536                        self.request.write(self.formatter.paragraph(0))
1537                    attrs, attrerr = self._getTableAttrs(line[indlen+2:])
1538                    self.request.write(self.formatter.table(1, attrs) + attrerr)
1539                    self.in_table = True # self.lineno
1540                elif (self.in_table and not
1541                      # intra-table comments should not break a table
1542                      (line.startswith("##") or
1543                       line[indlen:indlen + 2] == "||" and
1544                       line.endswith("|| ") and
1545                       len(line) >= 5 + indlen)):
1546
1547                    # Close table
1548                    self.request.write(self.formatter.table(0))
1549                    self.request.write(self._line_anchordef())
1550                    self.in_table = 0
1551
1552            # Scan line, format and write
1553            formatted_line = self.scan(line, inhibit_p=inhibit_p)
1554            self.request.write(formatted_line)
1555
1556
1557        # Close code displays, paragraphs, tables and open lists
1558        self.request.write(self._undent())
1559        if self.in_pre: self.request.write(self.formatter.preformatted(0))
1560        if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
1561        if self.in_table: self.request.write(self.formatter.table(0))
1562
1563        if self.wrapping_div_class:
1564            self.request.write(self.formatter.div(0))
1565
1566
1567    # Private helpers ------------------------------------------------------------
1568
1569    def setParser(self, name):
1570        """ Set parser to parser named 'name' """
1571        # XXX this is done by the formatter as well
1572        try:
1573            self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name)
1574        except wikiutil.PluginMissingError:
1575            self.parser = None
1576
1577del _
1578