1# -*- coding: iso-8859-1 -*- 2""" 3 MoinMoin - MoinMoin Wiki Markup Parser 4 5 @copyright: 2000-2002 Juergen Hermann <jh@web.de>, 6 2006-2008 MoinMoin:ThomasWaldmann, 7 2007 by MoinMoin:ReimarBauer 8 @license: GNU GPL, see COPYING for details. 9""" 10 11import re 12 13from MoinMoin import log 14logging = log.getLogger(__name__) 15 16from MoinMoin import config, wikiutil, macro 17from MoinMoin.Page import Page 18 19Dependencies = ['user'] # {{{#!wiki comment ... }}} has different output depending on the user's profile settings 20 21 22_ = lambda x: x 23 24class Parser: 25 """ 26 Parse wiki format markup (and call the formatter to generate output). 27 28 All formatting commands can be parsed one line at a time, though 29 some state is carried over between lines. 30 31 Methods named like _*_repl() are responsible to handle the named regex patterns. 32 """ 33 34 extensions = ['.moin'] 35 # allow caching 36 caching = 1 37 Dependencies = Dependencies 38 quickhelp = _(u"""\ 39 Emphasis:: <<Verbatim('')>>''italics''<<Verbatim('')>>; <<Verbatim(''')>>'''bold'''<<Verbatim(''')>>; <<Verbatim(''''')>>'''''bold italics'''''<<Verbatim(''''')>>; <<Verbatim('')>>''mixed ''<<Verbatim(''')>>'''''bold'''<<Verbatim(''')>> and italics''<<Verbatim('')>>; <<Verbatim(----)>> horizontal rule. 40 Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====. 41 Lists:: space and one of: * bullets; 1., a., A., i., I. numbered items; 1.#n start numbering at n; space alone indents. 42 Links:: <<Verbatim(JoinCapitalizedWords)>>; <<Verbatim([[target|linktext]])>>. 43 Tables:: || cell text |||| cell text spanning 2 columns ||; no trailing white space allowed after tables or titles. 44 45(!) For more help, see HelpOnEditing or HelpOnMoinWikiSyntax. 46""") 47 48 # some common strings 49 CHILD_PREFIX = wikiutil.CHILD_PREFIX 50 CHILD_PREFIX_LEN = wikiutil.CHILD_PREFIX_LEN 51 PARENT_PREFIX = wikiutil.PARENT_PREFIX 52 PARENT_PREFIX_LEN = wikiutil.PARENT_PREFIX_LEN 53 54 punct_pattern = re.escape(u'''"\'}]|:,.)?!''') 55 url_scheme = u'|'.join(config.url_schemas) 56 57 # some common rules 58 url_rule = ur''' 59 (?:^|(?<=\W)) # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left 60 (?P<url_target> # capture whole url there 61 (?P<url_scheme>%(url_scheme)s) # some scheme 62 \: 63 \S+? # anything non-whitespace 64 ) 65 (?:$|(?=\s|[%(punct)s]+(\s|$))) # require either end of line or some whitespace or some punctuation+blank/eol afterwards 66 ''' % { 67 'url_scheme': url_scheme, 68 'punct': punct_pattern, 69 } 70 71 # this is for a free (non-bracketed) interwiki link - to avoid false positives, 72 # we are rather restrictive here (same as in moin 1.5: require that the 73 # interwiki_wiki name starts with an uppercase letter A-Z. Later, the code 74 # also checks whether the wiki name is in the interwiki map (if not, it renders 75 # normal text, no link): 76 interwiki_rule = ur''' 77 (?:^|(?<=\W)) # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left 78 (?P<interwiki_wiki>[A-Z][a-zA-Z]+) # interwiki wiki name 79 \: 80 (?P<interwiki_page> # interwiki page name 81 (?=[^ ]*[%(u)s%(l)s0..9][^ ]*\ ) # make sure there is something non-blank with at least one alphanum letter following 82 [^\s%(punct)s]+ # we take all until we hit some blank or punctuation char ... 83 ) 84 ''' % { 85 'u': config.chars_upper, 86 'l': config.chars_lower, 87 'punct': punct_pattern, 88 } 89 90 # BE CAREFUL: if you do changes to word_rule, consider doing them also to word_rule_js (see below) 91 word_rule = ur''' 92 (?: 93 (?<![%(u)s%(l)s/]) # require anything not upper/lower/slash before 94 | 95 ^ # ... or beginning of line 96 ) 97 (?P<word_bang>\!)? # configurable: avoid getting CamelCase rendered as link 98 (?P<word_name> 99 (?: 100 (%(parent)s)* # there might be either ../ parent prefix(es) 101 | 102 ((?<!%(child)s)%(child)s)? # or maybe a single / child prefix (but not if we already had it before) 103 ) 104 ( 105 ((?<!%(child)s)%(child)s)? # there might be / child prefix (but not if we already had it before) 106 (?:[%(u)s][%(l)s]+){2,} # at least 2 upper>lower transitions make CamelCase 107 )+ # we can have MainPage/SubPage/SubSubPage ... 108 (?: 109 \# # anchor separator TODO check if this does not make trouble at places where word_rule is used 110 (?P<word_anchor>\S+) # some anchor name 111 )? 112 ) 113 (?: 114 (?![%(u)s%(l)s/]) # require anything not upper/lower/slash following 115 | 116 $ # ... or end of line 117 ) 118 ''' % { 119 'u': config.chars_upper, 120 'l': config.chars_lower, 121 'child': re.escape(CHILD_PREFIX), 122 'parent': re.escape(PARENT_PREFIX), 123 } 124 # simplified word_rule for FCKeditor's "unlink" plugin (puts a ! in front of a WikiName if WikiName matches word_rule_js), 125 # because JavaScript can not use group names and verbose regular expressions! 126 word_rule_js = ( 127 ur'''(?:(?<![%(u)s%(l)s/])|^)''' 128 ur'''(?:''' 129 ur'''(?:(%(parent)s)*|((?<!%(child)s)%(child)s)?)''' 130 ur'''(((?<!%(child)s)%(child)s)?(?:[%(u)s][%(l)s]+){2,})+''' 131 ur'''(?:\#(?:\S+))?''' 132 ur''')''' 133 ur'''(?:(?![%(u)s%(l)s/])|$)''' 134 ) % { 135 'u': config.chars_upper, 136 'l': config.chars_lower, 137 'child': re.escape(CHILD_PREFIX), 138 'parent': re.escape(PARENT_PREFIX), 139 } 140 141 # link targets: 142 extern_rule = r'(?P<extern_addr>(?P<extern_scheme>%s)\:.*)' % url_scheme 143 attach_rule = r'(?P<attach_scheme>attachment|drawing)\:(?P<attach_addr>.*)' 144 page_rule = r'(?P<page_name>.*)' 145 146 link_target_rules = r'|'.join([ 147 extern_rule, 148 attach_rule, 149 page_rule, 150 ]) 151 link_target_re = re.compile(link_target_rules, re.VERBOSE|re.UNICODE) 152 153 link_rule = r""" 154 (?P<link> 155 \[\[ # link target 156 \s* # strip space 157 (?P<link_target>[^|]+?) 158 \s* # strip space 159 ( 160 \| # link description 161 \s* # strip space 162 (?P<link_desc> 163 (?: # 1. we have either a transclusion here (usually a image) 164 \{\{ 165 \s*[^|]+?\s* # usually image target (strip space) 166 (\|\s*[^|]*?\s* # usually image alt text (optional, strip space) 167 (\|\s*[^|]*?\s* # transclusion parameters (usually key="value" format, optional, strip space) 168 )? 169 )? 170 \}\} 171 ) 172 | 173 (?: # 2. or we have simple text here. 174 [^|]+? 175 ) 176 )? 177 \s* # strip space 178 ( 179 \| # link parameters 180 \s* # strip space 181 (?P<link_params>[^|]+?)? 182 \s* # strip space 183 )? 184 )? 185 \]\] 186 ) 187 """ 188 189 transclude_rule = r""" 190 (?P<transclude> 191 \{\{ 192 \s*(?P<transclude_target>[^|]+?)\s* # usually image target (strip space) 193 (\|\s*(?P<transclude_desc>[^|]+?)?\s* # usually image alt text (optional, strip space) 194 (\|\s*(?P<transclude_params>[^|]+?)?\s* # transclusion parameters (usually key="value" format, optional, strip space) 195 )? 196 )? 197 \}\} 198 ) 199 """ 200 text_rule = r""" 201 (?P<simple_text> 202 [^|]+ # some text (not empty, does not contain separator) 203 ) 204 """ 205 # link descriptions: 206 link_desc_rules = r'|'.join([ 207 transclude_rule, 208 text_rule, 209 ]) 210 link_desc_re = re.compile(link_desc_rules, re.VERBOSE|re.UNICODE) 211 212 # transclude descriptions: 213 transclude_desc_rules = r'|'.join([ 214 text_rule, 215 ]) 216 transclude_desc_re = re.compile(transclude_desc_rules, re.VERBOSE|re.UNICODE) 217 218 # lists: 219 ol_rule = ur""" 220 ^\s+ # indentation 221 (?:[0-9]+|[aAiI])\. # arabic, alpha, roman counting 222 (?:\#\d+)? # optional start number 223 \s # require one blank afterwards 224 """ 225 ol_re = re.compile(ol_rule, re.VERBOSE|re.UNICODE) 226 227 dl_rule = ur""" 228 ^\s+ # indentation 229 .*?:: # definition term:: 230 \s # require on blank afterwards 231 """ 232 dl_re = re.compile(dl_rule, re.VERBOSE|re.UNICODE) 233 234 # others 235 indent_re = re.compile(ur"^\s*", re.UNICODE) 236 eol_re = re.compile(r'\r?\n', re.UNICODE) 237 238 # this is used inside parser/pre sections (we just want to know when it's over): 239 parser_unique = u'' 240 parser_scan_rule = ur""" 241(?P<parser_end> 242 %s\}\}\} # in parser/pre, we only look for the end of the parser/pre 243) 244""" 245 246 247 # the big, fat, less ugly one ;) 248 # please be very careful: blanks and # must be escaped with \ ! 249 scan_rules = ur""" 250(?P<emph_ibb> 251 '''''(?=[^']+''') # italic on, bold on, ..., bold off 252)|(?P<emph_ibi> 253 '''''(?=[^']+'') # italic on, bold on, ..., italic off 254)|(?P<emph_ib_or_bi> 255 '{5}(?=[^']) # italic and bold or bold and italic 256)|(?P<emph> 257 '{2,3} # italic or bold 258)|(?P<u> 259 __ # underline 260)|(?P<small> 261 ( 262 (?P<small_on>\~-\ ?) # small on (we eat a trailing blank if it is there) 263 | 264 (?P<small_off>-\~) # small off 265 ) 266)|(?P<big> 267 ( 268 (?P<big_on>\~\+\ ?) # big on (eat trailing blank) 269 | 270 (?P<big_off>\+\~) # big off 271 ) 272)|(?P<strike> 273 ( 274 (?P<strike_on>--\() # strike-through on 275 | 276 (?P<strike_off>\)--) # strike-through off 277 ) 278)|(?P<remark> 279 ( 280 (^|(?<=\s)) # we require either beginning of line or some whitespace before a remark begin 281 (?P<remark_on>/\*\s) # inline remark on (require and eat whitespace after it) 282 ) 283 | 284 ( 285 (?P<remark_off>\s\*/) # off (require and eat whitespace before it) 286 (?=\s) # we require some whitespace after a remark end 287 ) 288)|(?P<sup> 289 \^ # superscript on 290 (?P<sup_text>.*?) # capture the text 291 \^ # off 292)|(?P<sub> 293 ,, # subscript on 294 (?P<sub_text>.*?) # capture the text 295 ,, # off 296)|(?P<tt> 297 \{\{\{ # teletype on 298 (?P<tt_text>.*?) # capture the text 299 \}\}\} # off 300)|(?P<tt_bt> 301 ` # teletype (using a backtick) on 302 (?P<tt_bt_text>.*?) # capture the text 303 ` # off 304)|(?P<interwiki> 305 %(interwiki_rule)s # OtherWiki:PageName 306)|(?P<word> # must come AFTER interwiki rule! 307 %(word_rule)s # CamelCase wiki words 308)| 309%(link_rule)s 310| 311%(transclude_rule)s 312|(?P<url> 313 %(url_rule)s 314)|(?P<email> 315 [-\w._+]+ # name 316 \@ # at 317 [\w-]+(\.[\w-]+)+ # server/domain 318)|(?P<smiley> 319 (^|(?<=\s)) # we require either beginning of line or some space before a smiley 320 (%(smiley)s) # one of the smileys 321 (?=\s) # we require some space after the smiley 322)|(?P<macro> 323 << 324 (?P<macro_name>\w+) # name of the macro 325 (?:\((?P<macro_args>.*?)\))? # optionally macro arguments 326 >> 327)|(?P<heading> 328 ^(?P<hmarker>=+)\s+ # some === at beginning of line, eat trailing blanks 329 (?P<heading_text>.*?) # capture heading text 330 \s+(?P=hmarker)\s$ # some === at end of line (matching amount as we have seen), eat blanks 331)|(?P<parser> 332 \{\{\{ # parser on 333 (?P<parser_unique>(\{*|\w*)) # either some more {{{{ or some chars to solve the nesting problem 334 (?P<parser_line> 335 ( 336 \#! # hash bang 337 (?P<parser_name>\w*) # we have a parser name (can be empty) directly following the {{{ 338 ( 339 \s+ # some space ... 340 (?P<parser_args>.+?) # followed by parser args 341 )? # parser args are optional 342 \s* # followed by whitespace (eat it) until EOL 343 ) 344 | 345 (?P<parser_nothing>\s*) # no parser name, only whitespace up to EOL (eat it) 346 )$ 347 # "parser off" detection is done with parser_scan_rule! 348)|(?P<comment> 349 ^\#\#.*$ # src code comment, rest of line 350)|(?P<ol> 351 %(ol_rule)s # ordered list 352)|(?P<dl> 353 %(dl_rule)s # definition list 354)|(?P<li> 355 ^\s+\*\s* # unordered list 356)|(?P<li_none> 357 ^\s+\.\s* # unordered list, no bullets 358)|(?P<indent> 359 ^\s+ # indented by some spaces 360)|(?P<tableZ> 361 \|\|\ $ # the right end of a table row 362)|(?P<table> 363 (?:\|\|)+(?:<(?!<)[^>]*?>)?(?!\|?\s$) # a table 364)|(?P<rule> 365 -{4,} # hor. rule, min. 4 - 366)|(?P<entity> 367 &( 368 ([a-zA-Z]+) # symbolic entity, like ü 369 | 370 (\#(\d{1,5}|x[0-9a-fA-F]+)) # numeric entities, like * or B 371 ); 372)|(?P<sgml_entity> # must come AFTER entity rule! 373 [<>&] # needs special treatment for html/xml 374)""" % { 375 'url_scheme': url_scheme, 376 'url_rule': url_rule, 377 'punct': punct_pattern, 378 'ol_rule': ol_rule, 379 'dl_rule': dl_rule, 380 'interwiki_rule': interwiki_rule, 381 'word_rule': word_rule, 382 'link_rule': link_rule, 383 'transclude_rule': transclude_rule, 384 'u': config.chars_upper, 385 'l': config.chars_lower, 386 'smiley': u'|'.join([re.escape(s) for s in config.smileys])} 387 scan_re = re.compile(scan_rules, re.UNICODE|re.VERBOSE) 388 389 # Don't start p before these 390 no_new_p_before = ("heading rule table tableZ tr td " 391 "ul ol dl dt dd li li_none indent " 392 "macro parser") 393 no_new_p_before = no_new_p_before.split() 394 no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before))) 395 396 def __init__(self, raw, request, **kw): 397 self.raw = raw 398 self.request = request 399 self.form = request.form # Macro object uses this 400 self._ = request.getText 401 self.cfg = request.cfg 402 self.line_anchors = kw.get('line_anchors', True) 403 self.start_line = kw.get('start_line', 0) 404 self.macro = None 405 406 # currently, there is only a single, optional argument to this parser and 407 # (when given), it is used as class(es) for a div wrapping the formatter output 408 # either use a single class like "comment" or multiple like "comment/red/dotted" 409 self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ') 410 411 self.is_em = 0 # must be int 412 self.is_b = 0 # must be int 413 self.is_u = False 414 self.is_strike = False 415 self.is_big = False 416 self.is_small = False 417 self.is_remark = False 418 419 self.lineno = 0 420 self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl> 421 self.in_li = 0 # between <li> and </li> 422 self.in_dd = 0 # between <dd> and </dd> 423 424 # states of the parser concerning being inside/outside of some "pre" section: 425 # None == we are not in any kind of pre section (was: 0) 426 # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1) 427 # 'found_parser' == we found a valid parser (was: 2) 428 self.in_pre = None 429 430 self.in_table = 0 431 self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph 432 433 # holds the nesting level (in chars) of open lists 434 self.list_indents = [] 435 self.list_types = [] 436 437 def _close_item(self, result): 438 #result.append("<!-- close item begin -->\n") 439 if self.in_table: 440 result.append(self.formatter.table(0)) 441 self.in_table = 0 442 if self.in_li: 443 self.in_li = 0 444 if self.formatter.in_p: 445 result.append(self.formatter.paragraph(0)) 446 result.append(self.formatter.listitem(0)) 447 if self.in_dd: 448 self.in_dd = 0 449 if self.formatter.in_p: 450 result.append(self.formatter.paragraph(0)) 451 result.append(self.formatter.definition_desc(0)) 452 #result.append("<!-- close item end -->\n") 453 454 def _u_repl(self, word, groups): 455 """Handle underline.""" 456 self.is_u = not self.is_u 457 return self.formatter.underline(self.is_u) 458 459 def _remark_repl(self, word, groups): 460 """Handle remarks.""" 461 on = groups.get('remark_on') 462 if on and self.is_remark: 463 return self.formatter.text(word) 464 off = groups.get('remark_off') 465 if off and not self.is_remark: 466 return self.formatter.text(word) 467 self.is_remark = not self.is_remark 468 return self.formatter.span(self.is_remark, css_class='comment') 469 _remark_on_repl = _remark_repl 470 _remark_off_repl = _remark_repl 471 472 def _strike_repl(self, word, groups): 473 """Handle strikethrough.""" 474 on = groups.get('strike_on') 475 if on and self.is_strike: 476 return self.formatter.text(word) 477 off = groups.get('strike_off') 478 if off and not self.is_strike: 479 return self.formatter.text(word) 480 self.is_strike = not self.is_strike 481 return self.formatter.strike(self.is_strike) 482 _strike_on_repl = _strike_repl 483 _strike_off_repl = _strike_repl 484 485 def _small_repl(self, word, groups): 486 """Handle small.""" 487 on = groups.get('small_on') 488 if on and self.is_small: 489 return self.formatter.text(word) 490 off = groups.get('small_off') 491 if off and not self.is_small: 492 return self.formatter.text(word) 493 self.is_small = not self.is_small 494 return self.formatter.small(self.is_small) 495 _small_on_repl = _small_repl 496 _small_off_repl = _small_repl 497 498 def _big_repl(self, word, groups): 499 """Handle big.""" 500 on = groups.get('big_on') 501 if on and self.is_big: 502 return self.formatter.text(word) 503 off = groups.get('big_off') 504 if off and not self.is_big: 505 return self.formatter.text(word) 506 self.is_big = not self.is_big 507 return self.formatter.big(self.is_big) 508 _big_on_repl = _big_repl 509 _big_off_repl = _big_repl 510 511 def _emph_repl(self, word, groups): 512 """Handle emphasis, i.e. '' and '''.""" 513 if len(word) == 3: 514 self.is_b = not self.is_b 515 if self.is_em and self.is_b: 516 self.is_b = 2 517 return self.formatter.strong(self.is_b) 518 else: 519 self.is_em = not self.is_em 520 if self.is_em and self.is_b: 521 self.is_em = 2 522 return self.formatter.emphasis(self.is_em) 523 524 def _emph_ibb_repl(self, word, groups): 525 """Handle mixed emphasis, i.e. ''''' followed by '''.""" 526 self.is_b = not self.is_b 527 self.is_em = not self.is_em 528 if self.is_em and self.is_b: 529 self.is_b = 2 530 return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b) 531 532 def _emph_ibi_repl(self, word, groups): 533 """Handle mixed emphasis, i.e. ''''' followed by ''.""" 534 self.is_b = not self.is_b 535 self.is_em = not self.is_em 536 if self.is_em and self.is_b: 537 self.is_em = 2 538 return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em) 539 540 def _emph_ib_or_bi_repl(self, word, groups): 541 """Handle mixed emphasis, exactly five '''''.""" 542 b_before_em = self.is_b > self.is_em > 0 543 self.is_b = not self.is_b 544 self.is_em = not self.is_em 545 if b_before_em: 546 return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em) 547 else: 548 return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b) 549 550 def _sup_repl(self, word, groups): 551 """Handle superscript.""" 552 text = groups.get('sup_text', '') 553 return (self.formatter.sup(1) + 554 self.formatter.text(text) + 555 self.formatter.sup(0)) 556 _sup_text_repl = _sup_repl 557 558 def _sub_repl(self, word, groups): 559 """Handle subscript.""" 560 text = groups.get('sub_text', '') 561 return (self.formatter.sub(1) + 562 self.formatter.text(text) + 563 self.formatter.sub(0)) 564 _sub_text_repl = _sub_repl 565 566 def _tt_repl(self, word, groups): 567 """Handle inline code.""" 568 tt_text = groups.get('tt_text', '') 569 return (self.formatter.code(1) + 570 self.formatter.text(tt_text) + 571 self.formatter.code(0)) 572 _tt_text_repl = _tt_repl 573 574 def _tt_bt_repl(self, word, groups): 575 """Handle backticked inline code.""" 576 tt_bt_text = groups.get('tt_bt_text', '') 577 return (self.formatter.code(1, css="backtick") + 578 self.formatter.text(tt_bt_text) + 579 self.formatter.code(0)) 580 _tt_bt_text_repl = _tt_bt_repl 581 582 def _rule_repl(self, word, groups): 583 """Handle sequences of dashes.""" 584 result = self._undent() + self._closeP() 585 if len(word) <= 4: 586 result += self.formatter.rule() 587 else: 588 # Create variable rule size 1 - 6. Actual size defined in css. 589 size = min(len(word), 10) - 4 590 result += self.formatter.rule(size) 591 return result 592 593 def _interwiki_repl(self, word, groups): 594 """Handle InterWiki links.""" 595 wiki = groups.get('interwiki_wiki') 596 page = groups.get('interwiki_page') 597 598 wikitag_bad = wikiutil.resolve_interwiki(self.request, wiki, page)[3] 599 if wikitag_bad: 600 text = groups.get('interwiki') 601 return self.formatter.text(text) 602 else: 603 page, anchor = wikiutil.split_anchor(page) 604 return (self.formatter.interwikilink(1, wiki, page, anchor=anchor) + 605 self.formatter.text(page) + 606 self.formatter.interwikilink(0, wiki, page)) 607 _interwiki_wiki_repl = _interwiki_repl 608 _interwiki_page_repl = _interwiki_repl 609 610 def _word_repl(self, word, groups): 611 """Handle WikiNames.""" 612 bang = '' 613 bang_present = groups.get('word_bang') 614 if bang_present: 615 if self.cfg.bang_meta: 616 # handle !NotWikiNames 617 return self.formatter.nowikiword(word) 618 else: 619 bang = self.formatter.text('!') 620 name = groups.get('word_name') 621 current_page = self.formatter.page.page_name 622 abs_name = wikiutil.AbsPageName(current_page, name) 623 # if a simple, self-referencing link, emit it as plain text 624 if abs_name == current_page: 625 return self.formatter.text(word) 626 else: 627 abs_name, anchor = wikiutil.split_anchor(abs_name) 628 return (bang + 629 self.formatter.pagelink(1, abs_name, anchor=anchor) + 630 self.formatter.text(word) + 631 self.formatter.pagelink(0, abs_name)) 632 _word_bang_repl = _word_repl 633 _word_name_repl = _word_repl 634 _word_anchor_repl = _word_repl 635 636 def _url_repl(self, word, groups): 637 """Handle literal URLs.""" 638 scheme = groups.get('url_scheme', 'http') 639 target = groups.get('url_target', '') 640 return (self.formatter.url(1, target, css=scheme) + 641 self.formatter.text(target) + 642 self.formatter.url(0)) 643 _url_target_repl = _url_repl 644 _url_scheme_repl = _url_repl 645 646 def _transclude_description(self, desc, default_text=''): 647 """ parse a string <desc> valid as transclude description (text, ...) 648 and return the description. 649 650 We do NOT use wikiutil.escape here because it is html specific (the 651 html formatter, if used, does this for all html attributes). 652 653 We do NOT call formatter.text here because it sometimes is just used 654 for some alt and/or title attribute, but not emitted as text. 655 656 @param desc: the transclude description to parse 657 @param default_text: use this text if parsing desc returns nothing. 658 """ 659 m = self.transclude_desc_re.match(desc) 660 if m: 661 if m.group('simple_text'): 662 desc = m.group('simple_text') 663 else: 664 desc = default_text 665 return desc 666 667 def _get_params(self, params, tag_attrs=None, acceptable_attrs=None, query_args=None): 668 """ parse the parameters of link/transclusion markup, 669 defaults can be a dict with some default key/values 670 that will be in the result as given, unless overriden 671 by the params. 672 """ 673 if tag_attrs is None: 674 tag_attrs = {} 675 if query_args is None: 676 query_args = {} 677 if params: 678 fixed, kw, trailing = wikiutil.parse_quoted_separated(params) 679 # we ignore fixed and trailing args and only use kw args: 680 if acceptable_attrs is None: 681 acceptable_attrs = [] 682 for key, val in kw.items(): 683 # wikiutil.escape for key/val must be done by (html) formatter! 684 if key in acceptable_attrs: 685 # tag attributes must be string type 686 tag_attrs[str(key)] = val 687 elif key.startswith('&'): 688 key = key[1:] 689 query_args[key] = val 690 return tag_attrs, query_args 691 692 def _transclude_repl(self, word, groups): 693 """Handles transcluding content, usually embedding images.""" 694 target = groups.get('transclude_target', '') 695 target = wikiutil.url_unquote(target) 696 desc = groups.get('transclude_desc', '') or '' 697 params = groups.get('transclude_params', u'') or u'' 698 acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS 699 acceptable_attrs_object = ['class', 'title', 'width', 'height', # no style because of JS 700 'type', 'standby', ] # we maybe need a hack for <PARAM> here 701 m = self.link_target_re.match(target) 702 if m: 703 if m.group('extern_addr'): 704 # currently only supports ext. image inclusion 705 target = m.group('extern_addr') 706 desc = self._transclude_description(desc, target) 707 tag_attrs, query_args = self._get_params(params, 708 tag_attrs={'class': 'external_image', 709 'alt': desc, 710 'title': desc, }, 711 acceptable_attrs=acceptable_attrs_img) 712 return self.formatter.image(src=target, **tag_attrs) 713 # FF2 has a bug with target mimetype detection, it looks at the url path 714 # and expects to find some "filename extension" there (like .png) and this 715 # (not the response http headers) will set the default content-type of 716 # the object. This will often work for staticly served files, but 717 # fails for MoinMoin attachments (they don't have the filename.ext in the 718 # path, but in the query string). FF3 seems to have this bug fixed, opera 9.2 719 # also works. 720 #return (self.formatter.transclusion(1, data=target) + 721 # desc + 722 # self.formatter.transclusion(0)) 723 724 elif m.group('attach_scheme'): 725 scheme = m.group('attach_scheme') 726 url = wikiutil.url_unquote(m.group('attach_addr')) 727 if scheme == 'attachment': 728 mt = wikiutil.MimeType(filename=url) 729 if mt.major == 'text': 730 desc = self._transclude_description(desc, url) 731 return self.formatter.attachment_inlined(url, desc) 732 # destinguishs if browser need a plugin in place 733 elif mt.major == 'image' and mt.minor in config.browser_supported_images: 734 desc = self._transclude_description(desc, url) 735 tag_attrs, query_args = self._get_params(params, 736 tag_attrs={'alt': desc, 737 'title': desc, }, 738 acceptable_attrs=acceptable_attrs_img) 739 return self.formatter.attachment_image(url, **tag_attrs) 740 else: 741 from MoinMoin.action import AttachFile 742 current_pagename = self.formatter.page.page_name 743 pagename, filename = AttachFile.absoluteName(url, current_pagename) 744 if AttachFile.exists(self.request, pagename, filename): 745 href = AttachFile.getAttachUrl(pagename, filename, self.request) 746 tag_attrs, query_args = self._get_params(params, 747 tag_attrs={'title': desc, }, 748 acceptable_attrs=acceptable_attrs_object) 749 return (self.formatter.transclusion(1, data=href, type=mt.spoil(), **tag_attrs) + 750 self.formatter.text(self._transclude_description(desc, url)) + 751 self.formatter.transclusion(0)) 752 else: 753 return (self.formatter.attachment_link(1, url) + 754 self.formatter.text(self._transclude_description(desc, url)) + 755 self.formatter.attachment_link(0)) 756 757 #NOT USED CURRENTLY: 758 759 # use EmbedObject for other mimetypes 760 if mt is not None: 761 from MoinMoin import macro 762 macro.request = self.request 763 macro.formatter = self.request.html_formatter 764 p = Parser("##\n", request) 765 m = macro.Macro(p) 766 pagename = self.formatter.page.page_name 767 return m.execute('EmbedObject', u'target=%s' % url) 768 elif scheme == 'drawing': 769 url = wikiutil.drawing2fname(url) 770 desc = self._transclude_description(desc, url) 771 if desc: 772 tag_attrs= {'alt': desc, 'title': desc, } 773 else: 774 tag_attrs = {} 775 tag_attrs, query_args = self._get_params(params, 776 tag_attrs=tag_attrs, 777 acceptable_attrs=acceptable_attrs_img) 778 return self.formatter.attachment_drawing(url, desc, **tag_attrs) 779 780 elif m.group('page_name'): 781 # experimental client side transclusion 782 page_name_all = m.group('page_name') 783 if ':' in page_name_all: 784 wiki_name, page_name = page_name_all.split(':', 1) 785 wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name) 786 else: 787 err = True 788 if err: # not a interwiki link / not in interwiki map 789 tag_attrs, query_args = self._get_params(params, 790 tag_attrs={'type': 'text/html', 791 'width': '100%', }, 792 acceptable_attrs=acceptable_attrs_object) 793 if 'action' not in query_args: 794 query_args['action'] = 'content' 795 url = Page(self.request, page_name_all).url(self.request, querystr=query_args) 796 return (self.formatter.transclusion(1, data=url, **tag_attrs) + 797 self.formatter.text(self._transclude_description(desc, page_name_all)) + 798 self.formatter.transclusion(0)) 799 #return u"Error: <<Include(%s,%s)>> emulation missing..." % (page_name, args) 800 else: # looks like a valid interwiki link 801 url = wikiutil.join_wiki(wikiurl, wikitail) 802 tag_attrs, query_args = self._get_params(params, 803 tag_attrs={'type': 'text/html', 804 'width': '100%', }, 805 acceptable_attrs=acceptable_attrs_object) 806 if 'action' not in query_args: 807 query_args['action'] = 'content' # XXX moin specific 808 url += '?%s' % wikiutil.makeQueryString(query_args) 809 return (self.formatter.transclusion(1, data=url, **tag_attrs) + 810 self.formatter.text(self._transclude_description(desc, page_name)) + 811 self.formatter.transclusion(0)) 812 #return u"Error: <<RemoteInclude(%s:%s,%s)>> still missing." % (wiki_name, page_name, args) 813 814 else: 815 desc = self._transclude_description(desc, target) 816 return self.formatter.text('{{%s|%s|%s}}' % (target, desc, params)) 817 return word +'???' 818 _transclude_target_repl = _transclude_repl 819 _transclude_desc_repl = _transclude_repl 820 _transclude_params_repl = _transclude_repl 821 822 def _link_description(self, desc, target='', default_text=''): 823 """ parse a string <desc> valid as link description (text, transclusion, ...) 824 and return formatted content. 825 826 @param desc: the link description to parse 827 @param default_text: use this text (formatted as text) if parsing 828 desc returns nothing. 829 @param target: target of the link (as readable markup) - used for 830 transcluded image's description 831 """ 832 m = self.link_desc_re.match(desc) 833 if m: 834 if m.group('simple_text'): 835 desc = m.group('simple_text') 836 desc = self.formatter.text(desc) 837 elif m.group('transclude'): 838 groupdict = m.groupdict() 839 if groupdict.get('transclude_desc') is None: 840 # if transcluded obj (image) has no description, use target for it 841 groupdict['transclude_desc'] = target 842 desc = m.group('transclude') 843 desc = self._transclude_repl(desc, groupdict) 844 else: 845 desc = default_text 846 if desc: 847 desc = self.formatter.text(desc) 848 return desc 849 850 def _link_repl(self, word, groups): 851 """Handle [[target|text]] links.""" 852 target = groups.get('link_target', '') 853 desc = groups.get('link_desc', '') or '' 854 params = groups.get('link_params', u'') or u'' 855 acceptable_attrs = ['class', 'title', 'target', 'accesskey', 'rel', ] # no style because of JS 856 mt = self.link_target_re.match(target) 857 if mt: 858 if mt.group('page_name'): 859 page_name_and_anchor = mt.group('page_name') 860 if ':' in page_name_and_anchor: 861 wiki_name, page_name = page_name_and_anchor.split(':', 1) 862 wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name) 863 else: 864 err = True 865 if err: # not a interwiki link / not in interwiki map 866 page_name, anchor = wikiutil.split_anchor(page_name_and_anchor) 867 current_page = self.formatter.page.page_name 868 if not page_name: 869 page_name = current_page 870 # handle relative links 871 abs_page_name = wikiutil.AbsPageName(current_page, page_name) 872 tag_attrs, query_args = self._get_params(params, 873 tag_attrs={}, 874 acceptable_attrs=acceptable_attrs) 875 return (self.formatter.pagelink(1, abs_page_name, anchor=anchor, querystr=query_args, **tag_attrs) + 876 self._link_description(desc, target, page_name_and_anchor) + 877 self.formatter.pagelink(0, abs_page_name)) 878 else: # interwiki link 879 page_name, anchor = wikiutil.split_anchor(page_name) 880 tag_attrs, query_args = self._get_params(params, 881 tag_attrs={}, 882 acceptable_attrs=acceptable_attrs) 883 return (self.formatter.interwikilink(1, wiki_name, page_name, anchor=anchor, querystr=query_args, **tag_attrs) + 884 self._link_description(desc, target, page_name) + 885 self.formatter.interwikilink(0, wiki_name, page_name)) 886 887 elif mt.group('extern_addr'): 888 scheme = mt.group('extern_scheme') 889 target = mt.group('extern_addr') 890 tag_attrs, query_args = self._get_params(params, 891 tag_attrs={'class': scheme, }, 892 acceptable_attrs=acceptable_attrs) 893 return (self.formatter.url(1, target, **tag_attrs) + 894 self._link_description(desc, target, target) + 895 self.formatter.url(0)) 896 897 elif mt.group('attach_scheme'): 898 scheme = mt.group('attach_scheme') 899 url = wikiutil.url_unquote(mt.group('attach_addr')) 900 tag_attrs, query_args = self._get_params(params, 901 tag_attrs={'title': desc, }, 902 acceptable_attrs=acceptable_attrs) 903 if scheme == 'attachment': # ZZZ 904 return (self.formatter.attachment_link(1, url, querystr=query_args, **tag_attrs) + 905 self._link_description(desc, target, url) + 906 self.formatter.attachment_link(0)) 907 elif scheme == 'drawing': 908 url = wikiutil.drawing2fname(url) 909 return self.formatter.attachment_drawing(url, desc, alt=desc, **tag_attrs) 910 else: 911 if desc: 912 desc = '|' + desc 913 return self.formatter.text('[[%s%s]]' % (target, desc)) 914 _link_target_repl = _link_repl 915 _link_desc_repl = _link_repl 916 _link_params_repl = _link_repl 917 918 def _email_repl(self, word, groups): 919 """Handle email addresses (without a leading mailto:).""" 920 return (self.formatter.url(1, "mailto:%s" % word, css='mailto') + 921 self.formatter.text(word) + 922 self.formatter.url(0)) 923 924 def _sgml_entity_repl(self, word, groups): 925 """Handle SGML entities.""" 926 return self.formatter.text(word) 927 928 def _entity_repl(self, word, groups): 929 """Handle numeric (decimal and hexadecimal) and symbolic SGML entities.""" 930 return self.formatter.rawHTML(word) 931 932 def _indent_repl(self, match, groups): 933 """Handle pure indentation (no - * 1. markup).""" 934 result = [] 935 if not (self.in_li or self.in_dd): 936 self._close_item(result) 937 self.in_li = 1 938 css_class = None 939 if self.line_was_empty and not self.first_list_item: 940 css_class = 'gap' 941 result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none")) 942 return ''.join(result) 943 944 def _li_none_repl(self, match, groups): 945 """Handle type=none (" .") lists.""" 946 result = [] 947 self._close_item(result) 948 self.in_li = 1 949 css_class = None 950 if self.line_was_empty and not self.first_list_item: 951 css_class = 'gap' 952 result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none")) 953 return ''.join(result) 954 955 def _li_repl(self, match, groups): 956 """Handle bullet (" *") lists.""" 957 result = [] 958 self._close_item(result) 959 self.in_li = 1 960 css_class = None 961 if self.line_was_empty and not self.first_list_item: 962 css_class = 'gap' 963 result.append(self.formatter.listitem(1, css_class=css_class)) 964 return ''.join(result) 965 966 def _ol_repl(self, match, groups): 967 """Handle numbered lists.""" 968 return self._li_repl(match, groups) 969 970 def _dl_repl(self, match, groups): 971 """Handle definition lists.""" 972 result = [] 973 self._close_item(result) 974 self.in_dd = 1 975 result.extend([ 976 self.formatter.definition_term(1), 977 self.formatter.text(match[1:-3].lstrip(' ')), 978 self.formatter.definition_term(0), 979 self.formatter.definition_desc(1), 980 ]) 981 return ''.join(result) 982 983 def _indent_level(self): 984 """Return current char-wise indent level.""" 985 return len(self.list_indents) and self.list_indents[-1] 986 987 def _indent_to(self, new_level, list_type, numtype, numstart): 988 """Close and open lists.""" 989 openlist = [] # don't make one out of these two statements! 990 closelist = [] 991 992 if self._indent_level() != new_level and self.in_table: 993 closelist.append(self.formatter.table(0)) 994 self.in_table = 0 995 996 while self._indent_level() > new_level: 997 self._close_item(closelist) 998 if self.list_types[-1] == 'ol': 999 tag = self.formatter.number_list(0) 1000 elif self.list_types[-1] == 'dl': 1001 tag = self.formatter.definition_list(0) 1002 else: 1003 tag = self.formatter.bullet_list(0) 1004 closelist.append(tag) 1005 1006 del self.list_indents[-1] 1007 del self.list_types[-1] 1008 1009 if self.list_types: # we are still in a list 1010 if self.list_types[-1] == 'dl': 1011 self.in_dd = 1 1012 else: 1013 self.in_li = 1 1014 1015 # Open new list, if necessary 1016 if self._indent_level() < new_level: 1017 self.list_indents.append(new_level) 1018 self.list_types.append(list_type) 1019 1020 if self.formatter.in_p: 1021 closelist.append(self.formatter.paragraph(0)) 1022 1023 if list_type == 'ol': 1024 tag = self.formatter.number_list(1, numtype, numstart) 1025 elif list_type == 'dl': 1026 tag = self.formatter.definition_list(1) 1027 else: 1028 tag = self.formatter.bullet_list(1) 1029 openlist.append(tag) 1030 1031 self.first_list_item = 1 1032 self.in_li = 0 1033 self.in_dd = 0 1034 1035 # If list level changes, close an open table 1036 if self.in_table and (openlist or closelist): 1037 closelist[0:0] = [self.formatter.table(0)] 1038 self.in_table = 0 1039 1040 self.in_list = self.list_types != [] 1041 return ''.join(closelist) + ''.join(openlist) 1042 1043 def _undent(self): 1044 """Close all open lists.""" 1045 result = [] 1046 #result.append("<!-- _undent start -->\n") 1047 self._close_item(result) 1048 for type in self.list_types[::-1]: 1049 if type == 'ol': 1050 result.append(self.formatter.number_list(0)) 1051 elif type == 'dl': 1052 result.append(self.formatter.definition_list(0)) 1053 else: 1054 result.append(self.formatter.bullet_list(0)) 1055 #result.append("<!-- _undent end -->\n") 1056 self.list_indents = [] 1057 self.list_types = [] 1058 return ''.join(result) 1059 1060 def _getTableAttrs(self, attrdef): 1061 attr_rule = r'^(\|\|)*<(?!<)(?P<attrs>[^>]*?)>' 1062 m = re.match(attr_rule, attrdef, re.U) 1063 if not m: 1064 return {}, '' 1065 attrdef = m.group('attrs') 1066 1067 # extension for special table markup 1068 def table_extension(key, parser, attrs, wiki_parser=self): 1069 """ returns: tuple (found_flag, msg) 1070 found_flag: whether we found something and were able to process it here 1071 true for special stuff like 100% or - or #AABBCC 1072 false for style xxx="yyy" attributes 1073 msg: "" or an error msg 1074 """ 1075 _ = wiki_parser._ 1076 found = False 1077 msg = '' 1078 if key[0] in "0123456789": 1079 token = parser.get_token() 1080 if token != '%': 1081 wanted = '%' 1082 msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % { 1083 'wanted': wanted, 'key': key, 'token': token} 1084 else: 1085 try: 1086 dummy = int(key) 1087 except ValueError: 1088 msg = _('Expected an integer "%(key)s" before "%(token)s"') % { 1089 'key': key, 'token': token} 1090 else: 1091 found = True 1092 attrs['width'] = '"%s%%"' % key 1093 elif key == '-': 1094 arg = parser.get_token() 1095 try: 1096 dummy = int(arg) 1097 except ValueError: 1098 msg = _('Expected an integer "%(arg)s" after "%(key)s"') % { 1099 'arg': arg, 'key': key} 1100 else: 1101 found = True 1102 attrs['colspan'] = '"%s"' % arg 1103 elif key == '|': 1104 arg = parser.get_token() 1105 try: 1106 dummy = int(arg) 1107 except ValueError: 1108 msg = _('Expected an integer "%(arg)s" after "%(key)s"') % { 1109 'arg': arg, 'key': key} 1110 else: 1111 found = True 1112 attrs['rowspan'] = '"%s"' % arg 1113 elif key == '(': 1114 found = True 1115 attrs['align'] = '"left"' 1116 elif key == ':': 1117 found = True 1118 attrs['align'] = '"center"' 1119 elif key == ')': 1120 found = True 1121 attrs['align'] = '"right"' 1122 elif key == '^': 1123 found = True 1124 attrs['valign'] = '"top"' 1125 elif key == 'v': 1126 found = True 1127 attrs['valign'] = '"bottom"' 1128 elif key == '#': 1129 arg = parser.get_token() 1130 try: 1131 if len(arg) != 6: 1132 raise ValueError 1133 dummy = int(arg, 16) 1134 except ValueError: 1135 msg = _('Expected a color value "%(arg)s" after "%(key)s"') % { 1136 'arg': arg, 'key': key} 1137 else: 1138 found = True 1139 attrs['bgcolor'] = '"#%s"' % arg 1140 return found, self.formatter.rawHTML(msg) 1141 1142 # scan attributes 1143 attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension) 1144 if msg: 1145 msg = '<strong class="highlight">%s</strong>' % msg 1146 #logging.debug("parseAttributes returned %r" % attr) 1147 return attr, msg 1148 1149 def _tableZ_repl(self, word, groups): 1150 """Handle table row end.""" 1151 if self.in_table: 1152 result = '' 1153 # REMOVED: check for self.in_li, p should always close 1154 if self.formatter.in_p: 1155 result = self.formatter.paragraph(0) 1156 result += self.formatter.table_cell(0) + self.formatter.table_row(0) 1157 return result 1158 else: 1159 return self.formatter.text(word) 1160 1161 def _table_repl(self, word, groups): 1162 """Handle table cell separator.""" 1163 if self.in_table: 1164 result = [] 1165 # check for attributes 1166 attrs, attrerr = self._getTableAttrs(word) 1167 1168 # start the table row? 1169 if self.table_rowstart: 1170 self.table_rowstart = 0 1171 result.append(self.formatter.table_row(1, attrs)) 1172 else: 1173 # Close table cell, first closing open p 1174 # REMOVED check for self.in_li, paragraph should close always! 1175 if self.formatter.in_p: 1176 result.append(self.formatter.paragraph(0)) 1177 result.append(self.formatter.table_cell(0)) 1178 1179 # check for adjacent cell markers 1180 if word.count("|") > 2: 1181 if 'align' not in attrs and \ 1182 not ('style' in attrs and 'text-align' in attrs['style'].lower()): 1183 # add center alignment if we don't have some alignment already 1184 attrs['align'] = '"center"' 1185 if 'colspan' not in attrs: 1186 attrs['colspan'] = '"%d"' % (word.count("|")/2) 1187 1188 # return the complete cell markup 1189 result.append(self.formatter.table_cell(1, attrs) + attrerr) 1190 result.append(self._line_anchordef()) 1191 return ''.join(result) 1192 else: 1193 return self.formatter.text(word) 1194 1195 def _heading_repl(self, word, groups): 1196 """Handle section headings.""" 1197 heading_text = groups.get('heading_text', '') 1198 depth = min(len(groups.get('hmarker')), 5) 1199 return ''.join([ 1200 self._closeP(), 1201 self.formatter.heading(1, depth, id=heading_text), 1202 self.formatter.text(heading_text), 1203 self.formatter.heading(0, depth), 1204 ]) 1205 _heading_text_repl = _heading_repl 1206 1207 def _parser_repl(self, word, groups): 1208 """Handle parsed code displays.""" 1209 self.parser = None 1210 self.parser_name = None 1211 self.parser_lines = [] 1212 parser_line = word = groups.get('parser_line', u'') 1213 parser_name = groups.get('parser_name', None) 1214 parser_args = groups.get('parser_args', None) 1215 parser_nothing = groups.get('parser_nothing', None) 1216 parser_unique = groups.get('parser_unique', u'') or u'' 1217 #logging.debug("_parser_repl: parser_name %r parser_args %r parser_unique %r" % (parser_name, parser_args, parser_unique)) 1218 if set(parser_unique) == set('{'): # just some more {{{{{{ 1219 parser_unique = u'}' * len(parser_unique) # for symmetry cosmetic reasons 1220 self.parser_unique = parser_unique 1221 if parser_name is not None: 1222 # First try to find a parser for this 1223 if parser_name == u'': 1224 # empty bang paths lead to a normal code display 1225 # can be used to escape real, non-empty bang paths 1226 #logging.debug("_parser_repl: empty bangpath") 1227 parser_name = 'text' 1228 word = '' 1229 elif parser_nothing is None: 1230 # there was something non-whitespace following the {{{ 1231 parser_name = 'text' 1232 1233 self.setParser(parser_name) 1234 if not self.parser and parser_name: 1235 # loading the desired parser didn't work, retry a safe option: 1236 wanted_parser = parser_name 1237 parser_name = 'text' 1238 self.setParser(parser_name) 1239 word = '%s %s (-)' % (wanted_parser, parser_args) # indication that it did not work 1240 1241 if self.parser: 1242 self.parser_name = parser_name 1243 self.in_pre = 'found_parser' 1244 if word: 1245 self.parser_lines.append(word) 1246 else: 1247 self.in_pre = 'search_parser' 1248 1249 #logging.debug("_parser_repl: in_pre %r line %d" % (self.in_pre, self.lineno)) 1250 return '' 1251 _parser_unique_repl = _parser_repl 1252 _parser_line_repl = _parser_repl 1253 _parser_name_repl = _parser_repl 1254 _parser_args_repl = _parser_repl 1255 _parser_nothing_repl = _parser_repl 1256 1257 def _parser_content(self, line): 1258 """ handle state and collecting lines for parser in pre/parser sections """ 1259 #logging.debug("parser_content: %r" % line) 1260 if self.in_pre == 'search_parser' and line.strip(): 1261 # try to find a parser specification 1262 parser_name = '' 1263 if line.strip().startswith("#!"): 1264 parser_name = line.strip()[2:] 1265 if parser_name: 1266 parser_name = parser_name.split()[0] 1267 else: 1268 parser_name = 'text' 1269 self.setParser(parser_name) 1270 1271 if not self.parser: 1272 parser_name = 'text' 1273 self.setParser(parser_name) 1274 1275 if self.parser: 1276 self.in_pre = 'found_parser' 1277 self.parser_lines.append(line) 1278 self.parser_name = parser_name 1279 1280 elif self.in_pre == 'found_parser': 1281 # collect the content lines 1282 self.parser_lines.append(line) 1283 1284 return '' # we emit the content after reaching the end of the parser/pre section 1285 1286 def _parser_end_repl(self, word, groups): 1287 """ when we reach the end of a parser/pre section, 1288 we call the parser with the lines we collected 1289 """ 1290 #if self.in_pre: 1291 self.in_pre = None 1292 self.inhibit_p = 0 1293 #logging.debug("_parser_end_repl: in_pre %r line %d" % (self.in_pre, self.lineno)) 1294 self.request.write(self._closeP()) 1295 if self.parser_name is None: 1296 # we obviously did not find a parser specification 1297 self.parser_name = 'text' 1298 result = self.formatter.parser(self.parser_name, self.parser_lines) 1299 del self.parser_lines 1300 self.in_pre = None 1301 self.parser = None 1302 return result 1303 1304 def _smiley_repl(self, word, groups): 1305 """Handle smileys.""" 1306 return self.formatter.smiley(word) 1307 1308 def _comment_repl(self, word, groups): 1309 # if we are in a paragraph, we must close it so that normal text following 1310 # in the line below the comment will reopen a new paragraph. 1311 if self.formatter.in_p: 1312 self.formatter.paragraph(0) 1313 self.line_is_empty = 1 # markup following comment lines treats them as if they were empty 1314 return self.formatter.comment(word) 1315 1316 def _closeP(self): 1317 if self.formatter.in_p: 1318 return self.formatter.paragraph(0) 1319 return '' 1320 1321 def _macro_repl(self, word, groups): 1322 """Handle macros.""" 1323 macro_name = groups.get('macro_name') 1324 macro_args = groups.get('macro_args') 1325 self.inhibit_p = 0 # 1 fixed macros like UserPreferences (in the past, gone now), 0 fixes paragraph formatting for macros 1326 1327 # create macro instance 1328 if self.macro is None: 1329 self.macro = macro.Macro(self) 1330 return self.formatter.macro(self.macro, macro_name, macro_args, markup=groups.get('macro')) 1331 _macro_name_repl = _macro_repl 1332 _macro_args_repl = _macro_repl 1333 1334 def scan(self, line, inhibit_p=False): 1335 """ Scans one line 1336 Append text before match, invoke replace() with match, and add text after match. 1337 """ 1338 result = [] 1339 lastpos = 0 # absolute position within line 1340 line_length = len(line) 1341 1342 ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line) 1343 while lastpos <= line_length: # it is <=, not <, because we need to process the empty line also 1344 parser_scan_re = re.compile(self.parser_scan_rule % re.escape(self.parser_unique), re.VERBOSE|re.UNICODE) 1345 scan_re = self.in_pre and parser_scan_re or self.scan_re 1346 match = scan_re.search(line, lastpos) 1347 if match: 1348 start = match.start() 1349 if lastpos < start: 1350 if self.in_pre: 1351 self._parser_content(line[lastpos:start]) 1352 else: 1353 ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()]) 1354 if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p): 1355 result.append(self.formatter.paragraph(1, css_class="line862")) 1356 # add the simple text in between lastpos and beginning of current match 1357 result.append(self.formatter.text(line[lastpos:start])) 1358 1359 # Replace match with markup 1360 if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or 1361 self.in_table or self.in_list): 1362 result.append(self.formatter.paragraph(1, css_class="line867")) 1363 result.append(self.replace(match, inhibit_p)) 1364 end = match.end() 1365 lastpos = end 1366 if start == end: 1367 # we matched an empty string 1368 lastpos += 1 # proceed, we don't want to match this again 1369 else: 1370 if self.in_pre: 1371 # ilastpos is more then 0 and result of line slice is empty make useless line 1372 if not (lastpos > 0 and line[lastpos:] == ''): 1373 self._parser_content(line[lastpos:]) 1374 elif line[lastpos:]: 1375 ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:]) 1376 if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or 1377 self.in_li or self.in_dd): 1378 result.append(self.formatter.paragraph(1, css_class="line874")) 1379 # add the simple text (no markup) after last match 1380 result.append(self.formatter.text(line[lastpos:])) 1381 break # nothing left to do! 1382 return u''.join(result) 1383 1384 def _replace(self, match): 1385 """ Same as replace() but with no magic """ 1386 for name, text in match.groupdict().iteritems(): 1387 if text is not None: 1388 # Get replace method and replace text 1389 replace_func = getattr(self, '_%s_repl' % name) 1390 result = replace_func(text, match.groupdict()) 1391 return result 1392 1393 def replace(self, match, inhibit_p=False): 1394 """ Replace match using type name """ 1395 result = [] 1396 for type, hit in match.groupdict().items(): 1397 if hit is not None and not type in ["hmarker", ]: 1398 1399 ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit)) 1400 # Open p for certain types 1401 if not (inhibit_p or self.inhibit_p or self.formatter.in_p 1402 or self.in_pre or (type in self.no_new_p_before)): 1403 result.append(self.formatter.paragraph(1, css_class="line891")) 1404 1405 # Get replace method and replace hit 1406 replace_func = getattr(self, '_%s_repl' % type) 1407 result.append(replace_func(hit, match.groupdict())) 1408 return ''.join(result) 1409 else: 1410 # We should never get here 1411 import pprint 1412 raise Exception("Can't handle match %r\n%s\n%s" % ( 1413 match, 1414 pprint.pformat(match.groupdict()), 1415 pprint.pformat(match.groups()), 1416 )) 1417 1418 return "" 1419 1420 def _line_anchordef(self): 1421 if self.line_anchors and not self.line_anchor_printed: 1422 self.line_anchor_printed = 1 1423 return self.formatter.line_anchordef(self.lineno) 1424 else: 1425 return '' 1426 1427 def format(self, formatter, inhibit_p=False, **kw): 1428 """ For each line, scan through looking for magic 1429 strings, outputting verbatim any intervening text. 1430 """ 1431 self.formatter = formatter 1432 self.hilite_re = self.formatter.page.hilite_re 1433 1434 # get text and replace TABs 1435 rawtext = self.raw.expandtabs() 1436 1437 # go through the lines 1438 1439 for lineno in range(1, self.start_line + 1): 1440 self.request.write(self.formatter.line_anchordef(lineno)) 1441 1442 self.lineno = self.start_line 1443 self.lines = self.eol_re.split(rawtext) 1444 self.line_is_empty = 0 1445 1446 self.in_processing_instructions = 1 1447 1448 if self.wrapping_div_class: 1449 self.request.write(self.formatter.div(1, css_class=self.wrapping_div_class)) 1450 1451 # Main loop 1452 for line in self.lines: 1453 self.lineno += 1 1454 1455 self.line_anchor_printed = 0 1456 if not self.in_table: 1457 self.request.write(self._line_anchordef()) 1458 self.table_rowstart = 1 1459 self.line_was_empty = self.line_is_empty 1460 self.line_is_empty = 0 1461 self.first_list_item = 0 1462 self.inhibit_p = 0 1463 1464 # ignore processing instructions 1465 if self.in_processing_instructions: 1466 found = False 1467 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated", 1468 "#pragma", "#form", "#acl", "#language"): 1469 if line.lower().startswith(pi): 1470 self.request.write(self.formatter.comment(line)) 1471 found = True 1472 break 1473 if not found: 1474 self.in_processing_instructions = 0 1475 else: 1476 continue # do not parse this line 1477 1478 if not self.in_pre: 1479 # we don't have \n as whitespace any more 1480 # This is the space between lines we join to one paragraph 1481 line += ' ' 1482 1483 # Paragraph break on empty lines 1484 if not line.strip(): 1485 if self.in_table: 1486 self.request.write(self.formatter.table(0)) 1487 self.request.write(self._line_anchordef()) 1488 self.in_table = 0 1489 # CHANGE: removed check for not self.list_types 1490 # p should close on every empty line 1491 if self.formatter.in_p: 1492 self.request.write(self.formatter.paragraph(0)) 1493 self.line_is_empty = 1 1494 continue 1495 1496 # Check indent level 1497 indent = self.indent_re.match(line) 1498 indlen = len(indent.group(0)) 1499 indtype = "ul" 1500 numtype = None 1501 numstart = None 1502 if indlen: 1503 match = self.ol_re.match(line) 1504 if match: 1505 numtype, numstart = match.group(0).strip().split('.') 1506 numtype = numtype[0] 1507 1508 if numstart and numstart[0] == "#": 1509 numstart = int(numstart[1:]) 1510 else: 1511 numstart = None 1512 1513 indtype = "ol" 1514 else: 1515 match = self.dl_re.match(line) 1516 if match: 1517 indtype = "dl" 1518 1519 # output proper indentation tags 1520 self.request.write(self._indent_to(indlen, indtype, numtype, numstart)) 1521 1522 # Table mode 1523 # TODO: move into function? 1524 if (not self.in_table and line[indlen:indlen + 2] == "||" 1525 and line.endswith("|| ") and len(line) >= 5 + indlen): 1526 # Start table 1527 if self.list_types and not self.in_li: 1528 self.request.write(self.formatter.listitem(1, style="list-style-type:none")) 1529 ## CHANGE: no automatic p on li 1530 ##self.request.write(self.formatter.paragraph(1)) 1531 self.in_li = 1 1532 1533 # CHANGE: removed check for self.in_li 1534 # paragraph should end before table, always! 1535 if self.formatter.in_p: 1536 self.request.write(self.formatter.paragraph(0)) 1537 attrs, attrerr = self._getTableAttrs(line[indlen+2:]) 1538 self.request.write(self.formatter.table(1, attrs) + attrerr) 1539 self.in_table = True # self.lineno 1540 elif (self.in_table and not 1541 # intra-table comments should not break a table 1542 (line.startswith("##") or 1543 line[indlen:indlen + 2] == "||" and 1544 line.endswith("|| ") and 1545 len(line) >= 5 + indlen)): 1546 1547 # Close table 1548 self.request.write(self.formatter.table(0)) 1549 self.request.write(self._line_anchordef()) 1550 self.in_table = 0 1551 1552 # Scan line, format and write 1553 formatted_line = self.scan(line, inhibit_p=inhibit_p) 1554 self.request.write(formatted_line) 1555 1556 1557 # Close code displays, paragraphs, tables and open lists 1558 self.request.write(self._undent()) 1559 if self.in_pre: self.request.write(self.formatter.preformatted(0)) 1560 if self.formatter.in_p: self.request.write(self.formatter.paragraph(0)) 1561 if self.in_table: self.request.write(self.formatter.table(0)) 1562 1563 if self.wrapping_div_class: 1564 self.request.write(self.formatter.div(0)) 1565 1566 1567 # Private helpers ------------------------------------------------------------ 1568 1569 def setParser(self, name): 1570 """ Set parser to parser named 'name' """ 1571 # XXX this is done by the formatter as well 1572 try: 1573 self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name) 1574 except wikiutil.PluginMissingError: 1575 self.parser = None 1576 1577del _ 1578