1#!/usr/local/bin/python3.8 2# txt2tags - generic text conversion tool 3# https://txt2tags.org/ 4# https://github.com/jendrikseipp/txt2tags 5# 6# Copyright 2001-2010 Aurelio Jargas 7# Copyright 2010-2019 Jendrik Seipp 8# 9# License: GPL2+ (http://www.gnu.org/licenses/gpl-2.0.txt) 10# 11######################################################################## 12# 13# The code that [1] parses the marked text is separated from the 14# code that [2] insert the target tags. 15# 16# [1] made by: def convert() 17# [2] made by: class BlockMaster 18# 19# The structures of the marked text are identified and its contents are 20# extracted into a data holder (Python lists and dictionaries). 21# 22# When parsing the source file, the blocks (para, lists, quote, table) 23# are opened with BlockMaster, right when found. Then its contents, 24# which spans on several lines, are feeded into a special holder on the 25# BlockMaster instance. Just when the block is closed, the target tags 26# are inserted for the full block as a whole, in one pass. This way, we 27# have a better control on blocks. Much better than the previous line by 28# line approach. 29# 30# In other words, whenever inside a block, the parser *holds* the tag 31# insertion process, waiting until the full block is read. That was 32# needed primary to close paragraphs for the XHTML target, but 33# proved to be a very good adding, improving many other processing. 34# 35# ------------------------------------------------------------------- 36# 37# These important classes are all documented: 38# CommandLine, SourceDocument, ConfigMaster, ConfigLines. 39# 40# There is a RAW Config format and all kind of configuration is first 41# converted to this format. Then a generic method parses it. 42# 43# These functions get information about the input file(s) and take 44# care of the init processing: 45# process_source_file() and convert_file() 46# 47######################################################################## 48 49# XXX Smart Image Align don't work if the image is a link 50# Can't fix that because the image is expanded together with the 51# link, at the linkbank filling moment. Only the image is passed 52# to parse_images(), not the full line, so it is always 'middle'. 53 54# XXX Paragraph separation not valid inside Quote 55# Quote will not have <p></p> inside, instead will close and open 56# again the <blockquote>. This really sux in CSS, when defining a 57# different background color. Still don't know how to fix it. 58 59# XXX TODO (maybe) 60# New mark which expands to an anchor full title. 61# It is necessary to parse the full document in this order: 62# DONE 1st scan: HEAD: get all settings, including %!includeconf 63# DONE 2nd scan: BODY: expand includes & apply %!preproc 64# 3rd scan: BODY: read titles and compose TOC info 65# 4th scan: BODY: full parsing, expanding [#anchor] 1st 66# Steps 2 and 3 can be made together, with no tag adding. 67# Two complete body scans will be *slow*, don't know if it worths. 68# One solution may be add the titles as postproc rules 69 70from __future__ import print_function 71 72import collections 73import getopt 74import io 75import os 76import re 77import sys 78 79############################################################################## 80 81# Program information 82my_url = "https://txt2tags.org" 83my_name = "txt2tags" 84my_email = "jendrikseipp@gmail.com" 85__version__ = "3.7" 86 87# FLAGS : the conversion related flags , may be used in %!options 88# OPTIONS : the conversion related options, may be used in %!options 89# ACTIONS : the other behavior modifiers, valid on command line only 90# NO_TARGET: actions that don't require a target specification 91# NO_MULTI_INPUT: actions that don't accept more than one input file 92# CONFIG_KEYWORDS: the valid %!key:val keywords 93# 94# FLAGS and OPTIONS are configs that affect the converted document. 95# They usually have also a --no-<option> to turn them OFF. 96# 97# ACTIONS are needed because when handling multiple input files, strange 98# behavior may occur. There is no --no-<action>. 99# Options --version and --help inside %!options are odd. 100 101FLAGS = { 102 "headers": 1, 103 "enum-title": 0, 104 "toc": 0, 105 "rc": 1, 106 "quiet": 0, 107 "slides": 0, 108} 109OPTIONS = { 110 "target": "", 111 "style": "", 112 "infile": "", 113 "outfile": "", 114 "config-file": "", 115 "lang": "", 116} 117ACTIONS = { 118 "help": 0, 119 "version": 0, 120 "verbose": 0, 121 "debug": 0, 122 "targets": 0, 123} 124NO_TARGET = ["help", "version", "targets"] 125CONFIG_KEYWORDS = ["target", "style", "options", "preproc", "postproc"] 126 127TARGET_NAMES = { 128 "html": "HTML page", 129 "sgml": "SGML document", 130 "dbk": "DocBook document", 131 "tex": "LaTeX document", 132 "lout": "Lout document", 133 "man": "UNIX Manual page", 134 "mgp": "MagicPoint presentation", 135 "wiki": "Wikipedia page", 136 "gwiki": "Google Wiki page", 137 "doku": "DokuWiki page", 138 "pmw": "PmWiki page", 139 "moin": "MoinMoin page", 140 "txt": "Plain Text", 141 "adoc": "AsciiDoc document", 142 "creole": "Creole 1.0 document", 143 "md": "Markdown document", 144} 145 146TARGETS = sorted(TARGET_NAMES) 147 148DEBUG = 0 # do not edit here, please use --debug 149VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv 150QUIET = 0 # do not edit here, please use --quiet 151 152ENCODING = "utf-8" 153DFT_TEXT_WIDTH = 72 154 155RC_RAW = [] 156CMDLINE_RAW = [] 157CONF = {} 158BLOCK = None 159TITLE = None 160regex = {} 161TAGS = {} 162rules = {} 163 164TARGET = "" 165 166STDIN = STDOUT = "-" 167MODULEIN = MODULEOUT = "-module-" 168ESCCHAR = "\x00" 169SEPARATOR = "\x01" 170LISTNAMES = {"-": "list", "+": "numlist", ":": "deflist"} 171 172VERSIONSTR = "{} version {} <{}>".format(my_name, __version__, my_url) 173 174USAGE = "\n".join( 175 [ 176 "", 177 "Usage: %s [OPTIONS] infile.t2t" % my_name, 178 "", 179 " --targets list available targets and exit", 180 " -t, --target=TYPE set target document type. currently supported:", 181 " %s" % ", ".join(TARGETS), 182 " -i, --infile=FILE set FILE as the input file name ('-' for STDIN)", 183 " -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)", 184 " --toc add a table of contents to the output", 185 " -n, --enum-title enumerate all titles as 1, 1.1, 1.1.1, etc.", 186 " --style=FILE use FILE as the document style (e.g., a CSS file)", 187 " -H, --no-headers omit header and footer from output", 188 " -C, --config-file=F read configuration from file F", 189 " -q, --quiet suppress all output (except errors)", 190 " -v, --verbose print informative messages during conversion", 191 " -h, --help print this help text and exit", 192 " -V, --version print program version and exit", 193 "", 194 "Turn off options:", 195 " --no-enum-title, --headers, --no-quiet,", 196 " --no-rc, --no-style, --no-toc", 197 "", 198 "Example:", 199 " {} -t html --toc {}".format(my_name, "file.t2t"), 200 "", 201 "By default, converted output is saved to 'infile.<target>'.", 202 "Use --outfile to force an output file name.", 203 "If input file is '-', read from STDIN.", 204 "If output file is '-', dump output to STDOUT.", 205 "", 206 my_url, 207 "", 208 ] 209) 210 211 212############################################################################## 213 214 215# Here is all the target's templates 216# You may edit them to fit your needs 217# - the %(HEADERn)s strings represent the Header lines 218# - the %(STYLE)s string is changed by --style contents 219# - the %(ENCODING)s string is changed to "utf-8" 220# - if any of the above is empty, the full line is removed 221# - use %% to represent a literal % 222# 223HEADER_TEMPLATE = { 224 "txt": """\ 225%(HEADER1)s 226%(HEADER2)s 227%(HEADER3)s 228""", 229 "sgml": """\ 230<!doctype linuxdoc system> 231<article> 232<title>%(HEADER1)s 233<author>%(HEADER2)s 234<date>%(HEADER3)s 235""", 236 "html": """\ 237<!DOCTYPE html> 238<html> 239<head> 240<meta charset="%(ENCODING)s"> 241<title>%(HEADER1)s</title> 242<meta name="generator" content="https://txt2tags.org"> 243<link rel="stylesheet" href="%(STYLE)s"> 244<style type="text/css"> 245blockquote{margin: 1em 2em; border-left: 2px solid #999; 246 font-style: oblique; padding-left: 1em;} 247blockquote:first-letter{margin: .2em .1em .1em 0; font-size: 160%%; font-weight: bold;} 248blockquote:first-line{font-weight: bold;} 249body{font-family: sans-serif;} 250hr{background-color:#000;border:0;color:#000;} 251hr.heavy{height:2px;} 252hr.light{height:1px;} 253img{border:0;display:block;} 254img.right{margin:0 0 0 auto;} 255img.center{border:0;margin:0 auto;} 256table{border-collapse: collapse;} 257table th,table td{padding: 3px 7px 2px 7px;} 258table th{background-color: lightgrey;} 259table.center{margin-left:auto; margin-right:auto;} 260.center{text-align:center;} 261.right{text-align:right;} 262.left{text-align:left;} 263.tableborder,.tableborder td,.tableborder th{border:1px solid #000;} 264.underline{text-decoration:underline;} 265</style> 266</head> 267<body> 268<header> 269<hgroup> 270<h1>%(HEADER1)s</h1> 271<h2>%(HEADER2)s</h2> 272<h3>%(HEADER3)s</h3> 273</hgroup> 274</header> 275<article> 276""", 277 "dbk": """\ 278<?xml version="1.0" 279 encoding="%(ENCODING)s" 280?> 281<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"\ 282 "docbook/dtd/xml/4.5/docbookx.dtd"> 283<article lang="en"> 284 <articleinfo> 285 <title>%(HEADER1)s</title> 286 <authorgroup> 287 <author><othername>%(HEADER2)s</othername></author> 288 </authorgroup> 289 <date>%(HEADER3)s</date> 290 </articleinfo> 291""", 292 "man": """\ 293.TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s" 294""", 295 "mgp": """\ 296#!/usr/X11R6/bin/mgp -t 90 297%%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1" 298%%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1" 299%%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1" 300%%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1" 301%%deffont "mono" xfont "courier-medium-r", charset "iso8859-1" 302%%default 1 size 5 303%%default 2 size 8, fore "yellow", font "normal-b", center 304%%default 3 size 5, fore "white", font "normal", left, prefix " " 305%%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill 306%%tab 2 prefix " ", icon arc "orange" 40, leftfill 307%%tab 3 prefix " ", icon arc "brown" 40, leftfill 308%%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill 309%%tab 5 prefix " ", icon arc "magenta" 40, leftfill 310%%%%------------------------- end of headers ----------------------------- 311%%page 312 313 314 315 316 317%%size 10, center, fore "yellow" 318%(HEADER1)s 319 320%%font "normal-i", size 6, fore "white", center 321%(HEADER2)s 322 323%%font "mono", size 7, center 324%(HEADER3)s 325""", 326 "moin": """\ 327'''%(HEADER1)s''' 328 329''%(HEADER2)s'' 330 331%(HEADER3)s 332""", 333 "gwiki": """\ 334*%(HEADER1)s* 335 336%(HEADER2)s 337 338_%(HEADER3)s_ 339""", 340 "adoc": """\ 341= %(HEADER1)s 342%(HEADER2)s 343%(HEADER3)s 344""", 345 "doku": """\ 346===== %(HEADER1)s ===== 347 348**//%(HEADER2)s//** 349 350//%(HEADER3)s// 351""", 352 "pmw": """\ 353(:Title %(HEADER1)s:) 354 355(:Description %(HEADER2)s:) 356 357(:Summary %(HEADER3)s:) 358""", 359 "wiki": """\ 360'''%(HEADER1)s''' 361 362%(HEADER2)s 363 364''%(HEADER3)s'' 365""", 366 "tex": r"""\documentclass{article} 367\usepackage{booktabs} %% needed for tables 368\usepackage{graphicx} 369\usepackage{paralist} %% needed for compact lists 370\usepackage[normalem]{ulem} %% needed by strike 371\usepackage[urlcolor=blue,colorlinks=true]{hyperref} 372\usepackage[%(ENCODING)s]{inputenc} %% char encoding 373\usepackage{%(STYLE)s} %% user defined 374 375\title{%(HEADER1)s} 376\author{%(HEADER2)s} 377\begin{document} 378\date{%(HEADER3)s} 379\maketitle 380\clearpage 381""", 382 "lout": """\ 383@SysInclude { doc } 384@Document 385 @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ... 386 @PageOrientation { Portrait } # Portrait, Landscape 387 @ColumnNumber { 1 } # Number of columns (2, 3, ...) 388 @PageHeaders { Simple } # None, Simple, Titles, NoTitles 389 @InitialLanguage { English } # German, French, Portuguese, ... 390 @OptimizePages { Yes } # Yes/No smart page break feature 391// 392@Text @Begin 393@Display @Heading { %(HEADER1)s } 394@Display @I { %(HEADER2)s } 395@Display { %(HEADER3)s } 396#@NP # Break page after Headers 397""", 398 "creole": """\ 399%(HEADER1)s 400%(HEADER2)s 401%(HEADER3)s 402""", 403 "md": """\ 404%(HEADER1)s 405%(HEADER2)s 406%(HEADER3)s 407""" 408 # @SysInclude { tbl } # Tables support 409 # setup: @MakeContents { Yes } # show TOC 410 # setup: @SectionGap # break page at each section 411} 412assert set(HEADER_TEMPLATE) == set(TARGETS) 413 414 415############################################################################## 416 417 418def getTags(config): 419 "Returns all the known tags for the specified target" 420 421 keys = """ 422 title1 numtitle1 423 title2 numtitle2 424 title3 numtitle3 425 title4 numtitle4 426 title5 numtitle5 427 title1Open title1Close 428 title2Open title2Close 429 title3Open title3Close 430 title4Open title4Close 431 title5Open title5Close 432 blockTitle1Open blockTitle1Close 433 blockTitle2Open blockTitle2Close 434 blockTitle3Open blockTitle3Close 435 436 paragraphOpen paragraphClose 437 blockVerbOpen blockVerbClose blockVerbLine 438 blockQuoteOpen blockQuoteClose blockQuoteLine 439 blockCommentOpen blockCommentClose 440 441 fontMonoOpen fontMonoClose 442 fontBoldOpen fontBoldClose 443 fontItalicOpen fontItalicClose 444 fontUnderlineOpen fontUnderlineClose 445 fontStrikeOpen fontStrikeClose 446 447 listOpen listClose 448 listOpenCompact listCloseCompact 449 listItemOpen listItemClose listItemLine 450 numlistOpen numlistClose 451 numlistOpenCompact numlistCloseCompact 452 numlistItemOpen numlistItemClose numlistItemLine 453 deflistOpen deflistClose 454 deflistOpenCompact deflistCloseCompact 455 deflistItem1Open deflistItem1Close 456 deflistItem2Open deflistItem2Close deflistItem2LinePrefix 457 458 bar1 bar2 459 url urlMark 460 email emailMark 461 img imgAlignLeft imgAlignRight imgAlignCenter 462 _imgAlignLeft _imgAlignRight _imgAlignCenter 463 464 tableOpen tableClose 465 _tableBorder _tableAlignLeft _tableAlignCenter 466 tableRowOpen tableRowClose tableRowSep 467 tableTitleRowOpen tableTitleRowClose 468 tableCellOpen tableCellClose tableCellSep 469 tableTitleCellOpen tableTitleCellClose tableTitleCellSep 470 _tableColAlignLeft _tableColAlignRight _tableColAlignCenter 471 _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter 472 _tableCellColSpan tableColAlignSep 473 _tableCellMulticolOpen 474 _tableCellMulticolClose 475 476 bodyOpen bodyClose 477 cssOpen cssClose 478 tocOpen tocClose TOC 479 anchor 480 comment 481 pageBreak 482 EOD 483 """.split() 484 485 # TIP: \a represents the current text inside the mark 486 # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts 487 alltags = { 488 "txt": { 489 "title1": " \a", 490 "title2": "\t\a", 491 "title3": "\t\t\a", 492 "title4": "\t\t\t\a", 493 "title5": "\t\t\t\t\a", 494 "blockQuoteLine": "\t", 495 "listItemOpen": "- ", 496 "numlistItemOpen": "\a. ", 497 "bar1": "\a", 498 "url": "\a", 499 "urlMark": "\a (\a)", 500 "email": "\a", 501 "emailMark": "\a (\a)", 502 "img": "[\a]", 503 }, 504 "html": { 505 "anchor": ' id="\a"', 506 "bar1": '<hr class="light">', 507 "bar2": '<hr class="heavy">', 508 "blockQuoteClose": "</blockquote>", 509 "blockQuoteOpen": "<blockquote>", 510 "blockVerbClose": "</pre>", 511 "blockVerbOpen": "<pre>", 512 "bodyClose": "</div>", 513 "bodyOpen": '<div class="body" id="body">', 514 "comment": "<!-- \a -->", 515 "cssClose": "</style>", 516 "cssOpen": "<style>", 517 "deflistClose": "</dl>", 518 "deflistItem1Close": "</dt>", 519 "deflistItem1Open": "<dt>", 520 "deflistItem2Close": "</dd>", 521 "deflistItem2Open": "<dd>", 522 "deflistOpen": "<dl>", 523 "email": '<a href="mailto:\a">\a</a>', 524 "emailMark": '<a href="mailto:\a">\a</a>', 525 "EOD": "</article></body></html>", 526 "fontBoldClose": "</strong>", 527 "fontBoldOpen": "<strong>", 528 "fontItalicClose": "</em>", 529 "fontItalicOpen": "<em>", 530 "fontMonoClose": "</code>", 531 "fontMonoOpen": "<code>", 532 "fontStrikeClose": "</del>", 533 "fontStrikeOpen": "<del>", 534 "fontUnderlineClose": "</span>", 535 "fontUnderlineOpen": '<span class="underline">', 536 "_imgAlignCenter": ' class="center"', 537 "_imgAlignLeft": ' class="left"', 538 "_imgAlignRight": ' class="right"', 539 "img": '<img~a~ src="\a" alt="">', 540 "listClose": "</ul>", 541 "listItemClose": "</li>", 542 "listItemOpen": "<li>", 543 "listOpen": "<ul>", 544 "numlistClose": "</ol>", 545 "numlistItemClose": "</li>", 546 "numlistItemOpen": "<li>", 547 "numlistOpen": "<ol>", 548 "paragraphClose": "</p>", 549 "paragraphOpen": "<p>", 550 "_tableAlignCenter": ' style="margin-left: auto; margin-right: auto;"', 551 "_tableBorder": ' class="tableborder"', 552 "_tableCellAlignCenter": ' class="center"', 553 "_tableCellAlignRight": ' class="right"', 554 "tableCellClose": "</td>", 555 "_tableCellColSpan": ' colspan="\a"', 556 "tableCellOpen": "<td~a~~s~>", 557 "tableClose": "</table>", 558 "tableOpen": "<table~a~~b~>", 559 "tableRowClose": "</tr>", 560 "tableRowOpen": "<tr>", 561 "tableTitleCellClose": "</th>", 562 "tableTitleCellOpen": "<th~s~>", 563 "title1Close": "</section>", 564 "title1Open": "<section~A~>\n<h1>\a</h1>", 565 "title2Close": "</section>", 566 "title2Open": "<section~A~>\n<h2>\a</h2>", 567 "title3Close": "</section>", 568 "title3Open": "<section~A~>\n<h3>\a</h3>", 569 "title4Close": "</section>", 570 "title4Open": "<section~A~>\n<h4>\a</h4>", 571 "title5Close": "</section>", 572 "title5Open": "<section~A~>\n<h5>\a</h5>", 573 "tocClose": "</nav>", 574 "tocOpen": "<nav>", 575 "url": '<a href="\a">\a</a>', 576 "urlMark": '<a href="\a">\a</a>', 577 }, 578 "sgml": { 579 "paragraphOpen": "<p>", 580 "title1": "<sect>\a~A~<p>", 581 "title2": "<sect1>\a~A~<p>", 582 "title3": "<sect2>\a~A~<p>", 583 "title4": "<sect3>\a~A~<p>", 584 "title5": "<sect4>\a~A~<p>", 585 "anchor": '<label id="\a">', 586 "blockVerbOpen": "<tscreen><verb>", 587 "blockVerbClose": "</verb></tscreen>", 588 "blockQuoteOpen": "<quote>", 589 "blockQuoteClose": "</quote>", 590 "fontMonoOpen": "<tt>", 591 "fontMonoClose": "</tt>", 592 "fontBoldOpen": "<bf>", 593 "fontBoldClose": "</bf>", 594 "fontItalicOpen": "<em>", 595 "fontItalicClose": "</em>", 596 "fontUnderlineOpen": "<bf><em>", 597 "fontUnderlineClose": "</em></bf>", 598 "listOpen": "<itemize>", 599 "listClose": "</itemize>", 600 "listItemOpen": "<item>", 601 "numlistOpen": "<enum>", 602 "numlistClose": "</enum>", 603 "numlistItemOpen": "<item>", 604 "deflistOpen": "<descrip>", 605 "deflistClose": "</descrip>", 606 "deflistItem1Open": "<tag>", 607 "deflistItem1Close": "</tag>", 608 "bar1": "<!-- \a -->", 609 "url": '<htmlurl url="\a" name="\a">', 610 "urlMark": '<htmlurl url="\a" name="\a">', 611 "email": '<htmlurl url="mailto:\a" name="\a">', 612 "emailMark": '<htmlurl url="mailto:\a" name="\a">', 613 "img": '<figure><ph vspace=""><img src="\a"></figure>', 614 "tableOpen": '<table><tabular ca="~C~">', 615 "tableClose": "</tabular></table>", 616 "tableRowSep": "<rowsep>", 617 "tableCellSep": "<colsep>", 618 "_tableColAlignLeft": "l", 619 "_tableColAlignRight": "r", 620 "_tableColAlignCenter": "c", 621 "comment": "<!-- \a -->", 622 "TOC": "<toc>", 623 "EOD": "</article>", 624 }, 625 "dbk": { 626 "paragraphOpen": "<para>", 627 "paragraphClose": "</para>", 628 "title1Open": "~A~<sect1><title>\a</title>", 629 "title1Close": "</sect1>", 630 "title2Open": "~A~ <sect2><title>\a</title>", 631 "title2Close": " </sect2>", 632 "title3Open": "~A~ <sect3><title>\a</title>", 633 "title3Close": " </sect3>", 634 "title4Open": "~A~ <sect4><title>\a</title>", 635 "title4Close": " </sect4>", 636 "title5Open": "~A~ <sect5><title>\a</title>", 637 "title5Close": " </sect5>", 638 "anchor": '<anchor id="\a"/>\n', 639 "blockVerbOpen": "<programlisting>", 640 "blockVerbClose": "</programlisting>", 641 "blockQuoteOpen": "<blockquote><para>", 642 "blockQuoteClose": "</para></blockquote>", 643 "fontMonoOpen": "<code>", 644 "fontMonoClose": "</code>", 645 "fontBoldOpen": '<emphasis role="bold">', 646 "fontBoldClose": "</emphasis>", 647 "fontItalicOpen": "<emphasis>", 648 "fontItalicClose": "</emphasis>", 649 "fontUnderlineOpen": '<emphasis role="underline">', 650 "fontUnderlineClose": "</emphasis>", 651 "fontStrikeOpen": None, # Maybe <emphasis role="strikethrough"> 652 "fontStrikeClose": None, # Maybe </emphasis> 653 "listOpen": "<itemizedlist>", 654 "listClose": "</itemizedlist>", 655 "listItemOpen": "<listitem><para>", 656 "listItemClose": "</para></listitem>", 657 "numlistOpen": '<orderedlist numeration="arabic">', 658 "numlistClose": "</orderedlist>", 659 "numlistItemOpen": "<listitem><para>", 660 "numlistItemClose": "</para></listitem>", 661 "deflistOpen": "<variablelist>", 662 "deflistClose": "</variablelist>", 663 "deflistItem1Open": "<varlistentry><term>", 664 "deflistItem1Close": "</term>", 665 "deflistItem2Open": "<listitem><para>", 666 "deflistItem2Close": "</para></listitem></varlistentry>", 667 "bar1": None, 668 "bar2": None, 669 "url": '<ulink url="\a">\a</ulink>', 670 "urlMark": '<ulink url="\a">\a</ulink>', 671 "email": "<email>\a</email>", 672 "emailMark": "<email>\a</email>", 673 "img": ( 674 '<mediaobject><imageobject><imagedata fileref="\a"/>' 675 "</imageobject></mediaobject>" 676 ), 677 # Tables not supported, need to know number of columns. 678 # 'tableOpen' : '<informaltable><tgroup cols=""><tbody>', 679 # 'tableClose' : '</tbody></tgroup></informaltable>' , 680 # 'tableRowOpen' : '<row>' , 681 # 'tableRowClose' : '</row>' , 682 # 'tableCellOpen' : '<entry>' , 683 # 'tableCellClose' : '</entry>' , 684 # 'tableTitleRowOpen' : '<thead>' , 685 # 'tableTitleRowClose' : '</thead>' , 686 # '_tableBorder' : ' frame="all"' , 687 # '_tableAlignCenter' : ' align="center"' , 688 # '_tableCellAlignRight' : ' align="right"' , 689 # '_tableCellAlignCenter': ' align="center"' , 690 # '_tableCellColSpan' : ' COLSPAN="\a"' , 691 "TOC": "<index/>", 692 "comment": "<!-- \a -->", 693 "EOD": "</article>", 694 }, 695 "tex": { 696 "title1": "~A~\\section*{\a}", 697 "title2": "~A~\\subsection*{\a}", 698 "title3": "~A~\\subsubsection*{\a}", 699 # title 4/5: DIRTY: para+BF+\\+\n 700 "title4": "~A~\\paragraph{}\\textbf{\a}\\\\\n", 701 "title5": "~A~\\paragraph{}\\textbf{\a}\\\\\n", 702 "numtitle1": "\n~A~\\section{\a}", 703 "numtitle2": "~A~\\subsection{\a}", 704 "numtitle3": "~A~\\subsubsection{\a}", 705 "anchor": "\\hypertarget{\a}{}\n", 706 "blockVerbOpen": "\\begin{verbatim}", 707 "blockVerbClose": "\\end{verbatim}", 708 "blockQuoteOpen": "\\begin{quotation}", 709 "blockQuoteClose": "\\end{quotation}", 710 "fontMonoOpen": "\\texttt{", 711 "fontMonoClose": "}", 712 "fontBoldOpen": "\\textbf{", 713 "fontBoldClose": "}", 714 "fontItalicOpen": "\\textit{", 715 "fontItalicClose": "}", 716 "fontUnderlineOpen": "\\underline{", 717 "fontUnderlineClose": "}", 718 "fontStrikeOpen": "\\sout{", 719 "fontStrikeClose": "}", 720 "listOpen": "\\begin{itemize}", 721 "listClose": "\\end{itemize}", 722 "listOpenCompact": "\\begin{compactitem}", 723 "listCloseCompact": "\\end{compactitem}", 724 "listItemOpen": "\\item ", 725 "numlistOpen": "\\begin{enumerate}", 726 "numlistClose": "\\end{enumerate}", 727 "numlistOpenCompact": "\\begin{compactenum}", 728 "numlistCloseCompact": "\\end{compactenum}", 729 "numlistItemOpen": "\\item ", 730 "deflistOpen": "\\begin{description}", 731 "deflistClose": "\\end{description}", 732 "deflistOpenCompact": "\\begin{compactdesc}", 733 "deflistCloseCompact": "\\end{compactdesc}", 734 "deflistItem1Open": "\\item[", 735 "deflistItem1Close": "]", 736 "bar1": "\\hrulefill{}", 737 "bar2": "\\rule{\\linewidth}{1mm}", 738 "url": "\\htmladdnormallink{\a}{\a}", 739 "urlMark": "\\htmladdnormallink{\a}{\a}", 740 "email": "\\htmladdnormallink{\a}{mailto:\a}", 741 "emailMark": "\\htmladdnormallink{\a}{mailto:\a}", 742 "img": "\\includegraphics{\a}", 743 "tableOpen": "\\begin{tabular}{@{}~C~@{}}", 744 "tableClose": "\\end{tabular}", 745 "tableRowOpen": None, 746 "tableRowClose": " \\\\", 747 "tableTitleRowClose": " \\\\\n\\midrule", 748 "tableCellSep": " & ", 749 "_tableColAlignLeft": "l", 750 "_tableColAlignRight": "r", 751 "_tableColAlignCenter": "c", 752 "_tableCellAlignLeft": "l", 753 "_tableCellAlignRight": "r", 754 "_tableCellAlignCenter": "c", 755 "_tableCellColSpan": "\a", 756 "_tableCellMulticolOpen": "\\multicolumn{\a}{|~C~|}{", 757 "_tableCellMulticolClose": "}", 758 "tableColAlignSep": None, 759 "comment": "% \a", 760 "TOC": "\\tableofcontents", 761 "pageBreak": "\\clearpage", 762 "EOD": "\\end{document}", 763 }, 764 "lout": { 765 "paragraphOpen": "@LP", 766 "blockTitle1Open": "@BeginSections", 767 "blockTitle1Close": "@EndSections", 768 "blockTitle2Open": " @BeginSubSections", 769 "blockTitle2Close": " @EndSubSections", 770 "blockTitle3Open": " @BeginSubSubSections", 771 "blockTitle3Close": " @EndSubSubSections", 772 "title1Open": "~A~@Section @Title { \a } @Begin", 773 "title1Close": "@End @Section", 774 "title2Open": "~A~ @SubSection @Title { \a } @Begin", 775 "title2Close": " @End @SubSection", 776 "title3Open": "~A~ @SubSubSection @Title { \a } @Begin", 777 "title3Close": " @End @SubSubSection", 778 "title4Open": "~A~@LP @LeftDisplay @B { \a }", 779 "title5Open": "~A~@LP @LeftDisplay @B { \a }", 780 "anchor": "@Tag { \a }\n", 781 "blockVerbOpen": "@LP @ID @F @RawVerbatim @Begin", 782 "blockVerbClose": "@End @RawVerbatim", 783 "blockQuoteOpen": "@QD {", 784 "blockQuoteClose": "}", 785 # enclosed inside {} to deal with joined**words** 786 "fontMonoOpen": "{@F {", 787 "fontMonoClose": "}}", 788 "fontBoldOpen": "{@B {", 789 "fontBoldClose": "}}", 790 "fontItalicOpen": "{@II {", 791 "fontItalicClose": "}}", 792 "fontUnderlineOpen": "{@Underline{", 793 "fontUnderlineClose": "}}", 794 # the full form is more readable, but could be BL EL LI NL TL DTI 795 "listOpen": "@BulletList", 796 "listClose": "@EndList", 797 "listItemOpen": "@ListItem{", 798 "listItemClose": "}", 799 "numlistOpen": "@NumberedList", 800 "numlistClose": "@EndList", 801 "numlistItemOpen": "@ListItem{", 802 "numlistItemClose": "}", 803 "deflistOpen": "@TaggedList", 804 "deflistClose": "@EndList", 805 "deflistItem1Open": "@DropTagItem {", 806 "deflistItem1Close": "}", 807 "deflistItem2Open": "{", 808 "deflistItem2Close": "}", 809 "bar1": "@DP @FullWidthRule", 810 "url": "{blue @Colour { \a }}", 811 "urlMark": "\a ({blue @Colour { \a }})", 812 "email": "{blue @Colour { \a }}", 813 "emailMark": "\a ({blue @Colour{ \a }})", 814 "img": "~A~@IncludeGraphic { \a }", # eps only! 815 "_imgAlignLeft": "@LeftDisplay ", 816 "_imgAlignRight": "@RightDisplay ", 817 "_imgAlignCenter": "@CentredDisplay ", 818 # lout tables are *way* too complicated, no support for now 819 # 'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {', 820 # 'tableClose' : '}' , 821 # 'tableRowOpen' : '@Rowa\n' , 822 # 'tableTitleRowOpen' : '@HeaderRowa' , 823 # 'tableCenterAlign' : '@CentredDisplay ' , 824 # 'tableCellOpen' : '\a {' , # A, B, ... 825 # 'tableCellClose' : '}' , 826 # '_tableBorder' : '\nrule {yes}' , 827 "comment": "# \a", 828 # @MakeContents must be on the config file 829 "TOC": "@DP @ContentsGoesHere @DP", 830 "pageBreak": "@NP", 831 "EOD": "@End @Text", 832 }, 833 # https://moinmo.in/HelpOnMoinWikiSyntax 834 "moin": { 835 "title1": "= \a =", 836 "title2": "== \a ==", 837 "title3": "=== \a ===", 838 "title4": "==== \a ====", 839 "title5": "===== \a =====", 840 "blockVerbOpen": "{{{", 841 "blockVerbClose": "}}}", 842 "blockQuoteLine": " ", 843 "fontMonoOpen": "{{{", 844 "fontMonoClose": "}}}", 845 "fontBoldOpen": "'''", 846 "fontBoldClose": "'''", 847 "fontItalicOpen": "''", 848 "fontItalicClose": "''", 849 "fontUnderlineOpen": "__", 850 "fontUnderlineClose": "__", 851 "fontStrikeOpen": "--(", 852 "fontStrikeClose": ")--", 853 "listItemOpen": " * ", 854 "numlistItemOpen": " \a. ", 855 "deflistItem1Open": " ", 856 "deflistItem1Close": "::", 857 "deflistItem2LinePrefix": " :: ", 858 "bar1": "----", 859 "bar2": "--------", 860 "url": "[[\a]]", 861 "urlMark": "[[\a|\a]]", 862 "email": "\a", 863 "emailMark": "[[mailto:\a|\a]]", 864 "img": "[\a]", 865 "tableRowOpen": "||", 866 "tableCellOpen": "~A~", 867 "tableCellClose": "||", 868 "tableTitleCellClose": "||", 869 "_tableCellAlignRight": "<)>", 870 "_tableCellAlignCenter": "<:>", 871 "comment": "/* \a */", 872 "TOC": "[[TableOfContents]]", 873 }, 874 # http://code.google.com/p/support/wiki/WikiSyntax 875 "gwiki": { 876 "title1": "= \a =", 877 "title2": "== \a ==", 878 "title3": "=== \a ===", 879 "title4": "==== \a ====", 880 "title5": "===== \a =====", 881 "blockVerbOpen": "{{{", 882 "blockVerbClose": "}}}", 883 "blockQuoteLine": " ", 884 "fontMonoOpen": "{{{", 885 "fontMonoClose": "}}}", 886 "fontBoldOpen": "*", 887 "fontBoldClose": "*", 888 "fontItalicOpen": "_", # underline == italic 889 "fontItalicClose": "_", 890 "fontStrikeOpen": "~~", 891 "fontStrikeClose": "~~", 892 "listItemOpen": " * ", 893 "numlistItemOpen": " # ", 894 "url": "\a", 895 "urlMark": "[\a \a]", 896 "email": "mailto:\a", 897 "emailMark": "[mailto:\a \a]", 898 "img": "[\a]", 899 "tableRowOpen": "|| ", 900 "tableRowClose": " ||", 901 "tableCellSep": " || ", 902 }, 903 # http://powerman.name/doc/asciidoc 904 "adoc": { 905 "title1": "== \a", 906 "title2": "=== \a", 907 "title3": "==== \a", 908 "title4": "===== \a", 909 "title5": "===== \a", 910 "blockVerbOpen": "----", 911 "blockVerbClose": "----", 912 "fontMonoOpen": "+", 913 "fontMonoClose": "+", 914 "fontBoldOpen": "*", 915 "fontBoldClose": "*", 916 "fontItalicOpen": "_", 917 "fontItalicClose": "_", 918 "listItemOpen": "- ", 919 "listItemLine": "\t", 920 "numlistItemOpen": ". ", 921 "url": "\a", 922 "urlMark": "\a[\a]", 923 "email": "mailto:\a", 924 "emailMark": "mailto:\a[\a]", 925 "img": "image::\a[]", 926 }, 927 # http://wiki.splitbrain.org/wiki:syntax 928 # Hint: <br> is \\ $ 929 # Hint: You can add footnotes ((This is a footnote)) 930 "doku": { 931 "title1": "===== \a =====", 932 "title2": "==== \a ====", 933 "title3": "=== \a ===", 934 "title4": "== \a ==", 935 "title5": "= \a =", 936 # DokuWiki uses ' ' identation to mark verb blocks (see indentverbblock) 937 "blockQuoteLine": ">", 938 "fontMonoOpen": "''", 939 "fontMonoClose": "''", 940 "fontBoldOpen": "**", 941 "fontBoldClose": "**", 942 "fontItalicOpen": "//", 943 "fontItalicClose": "//", 944 "fontUnderlineOpen": "__", 945 "fontUnderlineClose": "__", 946 "fontStrikeOpen": "<del>", 947 "fontStrikeClose": "</del>", 948 "listItemOpen": " * ", 949 "numlistItemOpen": " - ", 950 "bar1": "----", 951 "url": "[[\a]]", 952 "urlMark": "[[\a|\a]]", 953 "email": "[[\a]]", 954 "emailMark": "[[\a|\a]]", 955 "img": "{{\a}}", 956 "imgAlignLeft": "{{\a }}", 957 "imgAlignRight": "{{ \a}}", 958 "imgAlignCenter": "{{ \a }}", 959 "tableTitleRowOpen": "^ ", 960 "tableTitleRowClose": " ^", 961 "tableTitleCellSep": " ^ ", 962 "tableRowOpen": "| ", 963 "tableRowClose": " |", 964 "tableCellSep": " | ", 965 # DokuWiki has no attributes. The content must be aligned! 966 # '_tableCellAlignRight' : '<)>' , # ?? 967 # '_tableCellAlignCenter': '<:>' , # ?? 968 # DokuWiki colspan is the same as txt2tags' with multiple ||| 969 # 'comment' : '## \a' , # ?? 970 # TOC is automatic 971 }, 972 # http://www.pmwiki.org/wiki/PmWiki/TextFormattingRules 973 "pmw": { 974 "title1": "~A~! \a ", 975 "title2": "~A~!! \a ", 976 "title3": "~A~!!! \a ", 977 "title4": "~A~!!!! \a ", 978 "title5": "~A~!!!!! \a ", 979 "blockQuoteOpen": "->", 980 "blockQuoteClose": "\n", 981 # In-text font 982 "fontMonoOpen": "@@", 983 "fontMonoClose": "@@", 984 "fontBoldOpen": "'''", 985 "fontBoldClose": "'''", 986 "fontItalicOpen": "''", 987 "fontItalicClose": "''", 988 "fontUnderlineOpen": "{+", 989 "fontUnderlineClose": "+}", 990 "fontStrikeOpen": "{-", 991 "fontStrikeClose": "-}", 992 # Lists 993 "listItemLine": "*", 994 "numlistItemLine": "#", 995 "deflistItem1Open": ": ", 996 "deflistItem1Close": ":", 997 # Verbatim block 998 "blockVerbOpen": "[@", 999 "blockVerbClose": "@]", 1000 "bar1": "----", 1001 # URL, email and anchor 1002 "url": "\a", 1003 "urlMark": "[[\a -> \a]]", 1004 "email": "\a", 1005 "emailMark": "[[\a -> mailto:\a]]", 1006 "anchor": "[[#\a]]\n", 1007 # Image markup 1008 "img": "\a", 1009 # Table attributes 1010 "tableTitleRowOpen": "||! ", 1011 "tableTitleRowClose": "||", 1012 "tableTitleCellSep": " ||!", 1013 "tableRowOpen": "||", 1014 "tableRowClose": "||", 1015 "tableCellSep": " ||", 1016 }, 1017 # http://en.wikipedia.org/wiki/Help:Editing 1018 "wiki": { 1019 "title1": "== \a ==", 1020 "title2": "=== \a ===", 1021 "title3": "==== \a ====", 1022 "title4": "===== \a =====", 1023 "title5": "====== \a ======", 1024 "blockVerbOpen": "<pre>", 1025 "blockVerbClose": "</pre>", 1026 "blockQuoteOpen": "<blockquote>", 1027 "blockQuoteClose": "</blockquote>", 1028 "fontMonoOpen": "<tt>", 1029 "fontMonoClose": "</tt>", 1030 "fontBoldOpen": "'''", 1031 "fontBoldClose": "'''", 1032 "fontItalicOpen": "''", 1033 "fontItalicClose": "''", 1034 "fontUnderlineOpen": "<u>", 1035 "fontUnderlineClose": "</u>", 1036 "fontStrikeOpen": "<s>", 1037 "fontStrikeClose": "</s>", 1038 # XXX Mixed lists not working: *#* list inside numlist inside list 1039 "listItemLine": "*", 1040 "numlistItemLine": "#", 1041 "deflistItem1Open": "; ", 1042 "deflistItem2LinePrefix": ": ", 1043 "bar1": "----", 1044 "url": "[\a]", 1045 "urlMark": "[\a \a]", 1046 "email": "mailto:\a", 1047 "emailMark": "[mailto:\a \a]", 1048 # [[Image:foo.png|right|Optional alt/caption text]] 1049 # (right, left, center, none) 1050 "img": "[[Image:\a~A~]]", 1051 "_imgAlignLeft": "|left", 1052 "_imgAlignCenter": "|center", 1053 "_imgAlignRight": "|right", 1054 # {| border="1" cellspacing="0" cellpadding="4" align="center" 1055 "tableOpen": '{|~A~~B~ cellpadding="4"', 1056 "tableClose": "|}", 1057 "tableRowOpen": "|-\n| ", 1058 "tableTitleRowOpen": "|-\n! ", 1059 "tableCellSep": " || ", 1060 "tableTitleCellSep": " !! ", 1061 "_tableBorder": ' border="1"', 1062 "_tableAlignCenter": ' align="center"', 1063 "comment": "<!-- \a -->", 1064 "TOC": "__TOC__", 1065 }, 1066 # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX 1067 # http://en.wikipedia.org/wiki/MagicPoint 1068 "mgp": { 1069 "paragraphOpen": '%font "normal", size 5', 1070 "title1": "%page\n\n\a\n", 1071 "title2": "%page\n\n\a\n", 1072 "title3": "%page\n\n\a\n", 1073 "title4": "%page\n\n\a\n", 1074 "title5": "%page\n\n\a\n", 1075 "blockVerbOpen": '%font "mono"', 1076 "blockVerbClose": '%font "normal"', 1077 "blockQuoteOpen": '%prefix " "', 1078 "blockQuoteClose": '%prefix " "', 1079 "fontMonoOpen": '\n%cont, font "mono"\n', 1080 "fontMonoClose": '\n%cont, font "normal"\n', 1081 "fontBoldOpen": '\n%cont, font "normal-b"\n', 1082 "fontBoldClose": '\n%cont, font "normal"\n', 1083 "fontItalicOpen": '\n%cont, font "normal-i"\n', 1084 "fontItalicClose": '\n%cont, font "normal"\n', 1085 "fontUnderlineOpen": '\n%cont, fore "cyan"\n', 1086 "fontUnderlineClose": '\n%cont, fore "white"\n', 1087 "listItemLine": "\t", 1088 "numlistItemLine": "\t", 1089 "numlistItemOpen": "\a. ", 1090 "deflistItem1Open": '\t\n%cont, font "normal-b"\n', 1091 "deflistItem1Close": '\n%cont, font "normal"\n', 1092 "bar1": '%bar "white" 5', 1093 "bar2": "%pause", 1094 "url": '\n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n', 1095 "urlMark": '\a \n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n', 1096 "email": '\n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n', 1097 "emailMark": '\a \n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n', 1098 "img": '~A~\n%newimage "\a"\n%left\n', 1099 "_imgAlignLeft": "\n%left", 1100 "_imgAlignRight": "\n%right", 1101 "_imgAlignCenter": "\n%center", 1102 "comment": "%% \a", 1103 "pageBreak": "%page\n\n\n", 1104 "EOD": "%%EOD", 1105 }, 1106 # man groff_man ; man 7 groff 1107 "man": { 1108 "paragraphOpen": ".P", 1109 "title1": ".SH \a", 1110 "title2": ".SS \a", 1111 "title3": ".SS \a", 1112 "title4": ".SS \a", 1113 "title5": ".SS \a", 1114 "blockVerbOpen": ".nf", 1115 "blockVerbClose": ".fi\n", 1116 "blockQuoteOpen": ".RS", 1117 "blockQuoteClose": ".RE", 1118 "fontBoldOpen": "\\fB", 1119 "fontBoldClose": "\\fR", 1120 "fontItalicOpen": "\\fI", 1121 "fontItalicClose": "\\fR", 1122 "listOpen": ".RS", 1123 "listItemOpen": ".IP \\(bu 3\n", 1124 "listClose": ".RE\n.IP", 1125 "numlistOpen": ".RS", 1126 "numlistItemOpen": ".IP \a. 3\n", 1127 "numlistClose": ".RE\n.IP", 1128 "deflistItem1Open": ".TP\n", 1129 "bar1": "\n\n", 1130 "url": "\a", 1131 "urlMark": "\a (\a)", 1132 "email": "\a", 1133 "emailMark": "\a (\a)", 1134 "img": "\a", 1135 "tableOpen": ".TS\n~A~~B~tab(^); ~C~.", 1136 "tableClose": ".TE", 1137 "tableRowOpen": " ", 1138 "tableCellSep": "^", 1139 "_tableAlignCenter": "center, ", 1140 "_tableBorder": "allbox, ", 1141 "_tableColAlignLeft": "l", 1142 "_tableColAlignRight": "r", 1143 "_tableColAlignCenter": "c", 1144 "comment": '.\\" \a', 1145 }, 1146 # http://www.wikicreole.org/wiki/AllMarkup 1147 "creole": { 1148 "title1": "= \a =", 1149 "title2": "== \a ==", 1150 "title3": "=== \a ===", 1151 "title4": "==== \a ====", 1152 "title5": "===== \a =====", 1153 "blockVerbOpen": "{{{", 1154 "blockVerbClose": "}}}", 1155 "blockQuoteLine": " ", 1156 "fontMonoOpen": None, # planned for 2.0, 1157 "fontMonoClose": None, # meanwhile we disable it 1158 "fontBoldOpen": "**", 1159 "fontBoldClose": "**", 1160 "fontItalicOpen": "//", 1161 "fontItalicClose": "//", 1162 "fontUnderlineOpen": "//", # no underline in 1.0, planned for 2.0, 1163 "fontUnderlineClose": "//", # meanwhile we use italic (emphasized) 1164 "fontStrikeOpen": None, # planned for 2.0, 1165 "fontStrikeClose": None, # meanwhile we disable it 1166 "listItemLine": "*", 1167 "numlistItemLine": "#", 1168 "deflistItem2LinePrefix": ":", 1169 "bar1": "----", 1170 "url": "[[\a]]", 1171 "urlMark": "[[\a|\a]]", 1172 "img": "{{\a}}", 1173 "tableTitleRowOpen": "|= ", 1174 "tableTitleRowClose": "|", 1175 "tableTitleCellSep": " |= ", 1176 "tableRowOpen": "| ", 1177 "tableRowClose": " |", 1178 "tableCellSep": " | ", 1179 # TODO: placeholder (mark for unknown syntax) 1180 # if possible: http://www.wikicreole.org/wiki/Placeholder 1181 }, 1182 # regular markdown: http://daringfireball.net/projects/markdown/syntax 1183 # markdown extra: http://michelf.com/projects/php-markdown/extra/ 1184 "md": { 1185 "title1": "# \a ", 1186 "title2": "## \a ", 1187 "title3": "### \a ", 1188 "title4": "#### \a ", 1189 "title5": "##### \a ", 1190 "blockVerbLine": " ", 1191 "blockQuoteLine": "> ", 1192 "fontMonoOpen": "`", 1193 "fontMonoClose": "`", 1194 "fontBoldOpen": "**", 1195 "fontBoldClose": "**", 1196 "fontItalicOpen": "*", 1197 "fontItalicClose": "*", 1198 "fontUnderlineOpen": None, 1199 "fontUnderlineClose": None, 1200 "fontStrikeOpen": "~~", 1201 "fontStrikeClose": "~~", 1202 # Lists 1203 "listOpenCompact": None, 1204 "listItemLine": " ", 1205 "listItemOpen": "*", 1206 "numlistItemLine": None, 1207 "numlistItemOpen": "1.", 1208 "deflistItem1Open": ": ", 1209 "deflistItem1Close": None, 1210 "deflistItem2Open": None, 1211 "deflistItem2Close": None, 1212 # Verbatim block 1213 "blockVerbOpen": None, 1214 "blockVerbClose": None, 1215 "bar1": "---", 1216 "bar2": "---", 1217 # URL, email and anchor 1218 "url": "\a", 1219 "urlMark": "[\a](\a)", 1220 "email": "<\a>", 1221 "emailMark": "[\a](mailto:\a)", 1222 "anchor": None, 1223 # Image markup 1224 "img": "![](\a)", 1225 "imgAlignLeft": None, 1226 "imgAlignRight": None, 1227 "imgAlignCenter": None, 1228 # Table attributes 1229 "tableTitleRowOpen": "| ", 1230 "tableTitleRowClose": "|\n|---------------|", 1231 "tableTitleCellSep": " |", 1232 "tableRowOpen": "|", 1233 "tableRowClose": "|", 1234 "tableCellSep": " |", 1235 }, 1236 } 1237 assert set(alltags) == set(TARGETS) 1238 1239 for target, tags in alltags.items(): 1240 for key, value in tags.items(): 1241 if key not in keys: 1242 raise AssertionError("{} target has invalid key {}".format(target, key)) 1243 if value is not None and not value: 1244 raise AssertionError("{} target drops {}".format(target, key)) 1245 1246 # Compose the target tags dictionary. 1247 tags = collections.defaultdict(str) 1248 for key, value in alltags[config["target"]].items(): 1249 if value: # Skip unsupported markup. 1250 tags[key] = maskEscapeChar(value) 1251 1252 # Map strong line to pagebreak 1253 if rules["mapbar2pagebreak"] and tags["pageBreak"]: 1254 tags["bar2"] = tags["pageBreak"] 1255 1256 # Map strong line to separator if not defined 1257 if not tags["bar2"] and tags["bar1"]: 1258 tags["bar2"] = tags["bar1"] 1259 1260 return tags 1261 1262 1263############################################################################## 1264 1265 1266def getRules(config): 1267 """Return all the target-specific syntax rules.""" 1268 allrules = [ 1269 # target rules (ON/OFF) 1270 "linkable", # target supports external links 1271 "tableable", # target supports tables 1272 "imglinkable", # target supports images as links 1273 "imgalignable", # target supports image alignment 1274 "imgasdefterm", # target supports image as definition term 1275 "autonumberlist", # target supports numbered lists natively 1276 "autonumbertitle", # target supports numbered titles natively 1277 "stylable", # target supports external style files 1278 "parainsidelist", # lists items supports paragraph 1279 "compactlist", # separate enclosing tags for compact lists 1280 "spacedlistitem", # lists support blank lines between items 1281 "listnotnested", # lists cannot be nested 1282 "quotenotnested", # quotes cannot be nested 1283 "verbblocknotescaped", # don't escape specials in verb block 1284 "verbblockfinalescape", # do final escapes in verb block 1285 "escapeurl", # escape special in link URL 1286 "labelbeforelink", # label comes before the link on the tag 1287 "onelinepara", # dump paragraph as a single long line 1288 "tabletitlerowinbold", # manually bold any cell on table titles 1289 "tablecellstrip", # strip extra spaces from each table cell 1290 "tablecellspannable", # the table cells can have span attribute 1291 "tablecellmulticol", # separate open+close tags for multicol cells 1292 "barinsidequote", # bars are allowed inside quote blocks 1293 "finalescapetitle", # perform final escapes on title lines 1294 "autotocnewpagebefore", # break page before automatic TOC 1295 "autotocnewpageafter", # break page after automatic TOC 1296 "autotocwithbars", # automatic TOC surrounded by bars 1297 "mapbar2pagebreak", # map the strong bar to a page break 1298 "titleblocks", # titles must be on open/close section blocks 1299 # Target code beautify (ON/OFF) 1300 "indentverbblock", # add leading spaces to verb block lines 1301 "breaktablecell", # break lines after any table cell 1302 "breaktablelineopen", # break line after opening table line 1303 "notbreaklistopen", # don't break line after opening a new list 1304 "keepquoteindent", # don't remove the leading TABs on quotes 1305 "keeplistindent", # don't remove the leading spaces on lists 1306 "blankendautotoc", # append a blank line at the auto TOC end 1307 "tagnotindentable", # tags must be placed at the line beginning 1308 "spacedlistitemopen", # append a space after the list item open tag 1309 "spacednumlistitemopen", # append a space after the numlist item open tag 1310 "deflisttextstrip", # strip the contents of the deflist text 1311 "blanksaroundpara", # put a blank line before and after paragraphs 1312 "blanksaroundverb", # put a blank line before and after verb blocks 1313 "blanksaroundquote", # put a blank line before and after quotes 1314 "blanksaroundlist", # put a blank line before and after lists 1315 "blanksaroundnumlist", # put a blank line before and after numlists 1316 "blanksarounddeflist", # put a blank line before and after deflists 1317 "blanksaroundtable", # put a blank line before and after tables 1318 "blanksaroundbar", # put a blank line before and after bars 1319 "blanksaroundtitle", # put a blank line before and after titles 1320 "blanksaroundnumtitle", # put a blank line before and after numtitles 1321 # Value settings 1322 "listmaxdepth", # maximum depth for lists 1323 "quotemaxdepth", # maximum depth for quotes 1324 "tablecellaligntype", # type of table cell align: cell, column 1325 ] 1326 1327 rules_bank = { 1328 "txt": { 1329 "indentverbblock": 1, 1330 "spacedlistitem": 1, 1331 "parainsidelist": 1, 1332 "keeplistindent": 1, 1333 "barinsidequote": 1, 1334 "autotocwithbars": 1, 1335 "blanksaroundpara": 1, 1336 "blanksaroundverb": 1, 1337 "blanksaroundquote": 1, 1338 "blanksaroundlist": 1, 1339 "blanksaroundnumlist": 1, 1340 "blanksarounddeflist": 1, 1341 "blanksaroundtable": 1, 1342 "blanksaroundbar": 1, 1343 "blanksaroundtitle": 1, 1344 "blanksaroundnumtitle": 1, 1345 }, 1346 "html": { 1347 "indentverbblock": 0, 1348 "linkable": 1, 1349 "stylable": 1, 1350 "escapeurl": 1, 1351 "imglinkable": 1, 1352 "imgalignable": 1, 1353 "imgasdefterm": 1, 1354 "autonumberlist": 1, 1355 "spacedlistitem": 1, 1356 "parainsidelist": 1, 1357 "tableable": 1, 1358 "tablecellstrip": 1, 1359 "breaktablecell": 1, 1360 "breaktablelineopen": 1, 1361 "keeplistindent": 1, 1362 "keepquoteindent": 1, 1363 "barinsidequote": 1, 1364 "autotocwithbars": 0, 1365 "tablecellspannable": 1, 1366 "tablecellaligntype": "cell", 1367 # 'blanksaroundpara':1, 1368 "blanksaroundverb": 1, 1369 # 'blanksaroundquote':1, 1370 "blanksaroundlist": 1, 1371 "blanksaroundnumlist": 1, 1372 "blanksarounddeflist": 1, 1373 "blanksaroundtable": 1, 1374 "blanksaroundbar": 1, 1375 "blanksaroundtitle": 1, 1376 "blanksaroundnumtitle": 1, 1377 "titleblocks": 1, 1378 }, 1379 "sgml": { 1380 "linkable": 1, 1381 "escapeurl": 1, 1382 "autonumberlist": 1, 1383 "spacedlistitem": 1, 1384 "tableable": 1, 1385 "tablecellstrip": 1, 1386 "blankendautotoc": 1, 1387 "quotenotnested": 1, 1388 "keeplistindent": 1, 1389 "keepquoteindent": 1, 1390 "barinsidequote": 1, 1391 "finalescapetitle": 1, 1392 "tablecellaligntype": "column", 1393 "blanksaroundpara": 1, 1394 "blanksaroundverb": 1, 1395 "blanksaroundquote": 1, 1396 "blanksaroundlist": 1, 1397 "blanksaroundnumlist": 1, 1398 "blanksarounddeflist": 1, 1399 "blanksaroundtable": 1, 1400 "blanksaroundbar": 1, 1401 "blanksaroundtitle": 1, 1402 "blanksaroundnumtitle": 1, 1403 }, 1404 "dbk": { 1405 "linkable": 1, 1406 "tableable": 0, # activate when table tags are ready 1407 "imglinkable": 1, 1408 "imgalignable": 1, 1409 "imgasdefterm": 1, 1410 "autonumberlist": 1, 1411 "autonumbertitle": 1, 1412 "parainsidelist": 1, 1413 "spacedlistitem": 1, 1414 "titleblocks": 1, 1415 }, 1416 "mgp": { 1417 "tagnotindentable": 1, 1418 "spacedlistitem": 1, 1419 "imgalignable": 1, 1420 "autotocnewpagebefore": 1, 1421 "blanksaroundpara": 1, 1422 "blanksaroundverb": 1, 1423 # 'blanksaroundquote':1, 1424 "blanksaroundlist": 1, 1425 "blanksaroundnumlist": 1, 1426 "blanksarounddeflist": 1, 1427 "blanksaroundtable": 1, 1428 "blanksaroundbar": 1, 1429 # 'blanksaroundtitle':1, 1430 # 'blanksaroundnumtitle':1, 1431 }, 1432 "tex": { 1433 "stylable": 1, 1434 "escapeurl": 1, 1435 "autonumberlist": 1, 1436 "autonumbertitle": 1, 1437 "spacedlistitem": 1, 1438 "compactlist": 1, 1439 "parainsidelist": 1, 1440 "tableable": 1, 1441 "tablecellstrip": 1, 1442 "tabletitlerowinbold": 0, 1443 "verbblocknotescaped": 1, 1444 "keeplistindent": 1, 1445 "listmaxdepth": 4, # deflist is 6 1446 "quotemaxdepth": 6, 1447 "barinsidequote": 1, 1448 "finalescapetitle": 1, 1449 "autotocnewpageafter": 1, 1450 "mapbar2pagebreak": 1, 1451 "tablecellaligntype": "column", 1452 "tablecellmulticol": 1, 1453 "blanksaroundpara": 1, 1454 "blanksaroundverb": 1, 1455 # 'blanksaroundquote':1, 1456 "blanksaroundlist": 1, 1457 "blanksaroundnumlist": 1, 1458 "blanksarounddeflist": 1, 1459 "blanksaroundtable": 1, 1460 "blanksaroundbar": 1, 1461 "blanksaroundtitle": 1, 1462 "blanksaroundnumtitle": 1, 1463 }, 1464 "lout": { 1465 "keepquoteindent": 1, 1466 "deflisttextstrip": 1, 1467 "escapeurl": 1, 1468 "verbblocknotescaped": 1, 1469 "imgalignable": 1, 1470 "mapbar2pagebreak": 1, 1471 "titleblocks": 1, 1472 "autonumberlist": 1, 1473 "parainsidelist": 1, 1474 "blanksaroundpara": 1, 1475 "blanksaroundverb": 1, 1476 # 'blanksaroundquote':1, 1477 "blanksaroundlist": 1, 1478 "blanksaroundnumlist": 1, 1479 "blanksarounddeflist": 1, 1480 "blanksaroundtable": 1, 1481 "blanksaroundbar": 1, 1482 "blanksaroundtitle": 1, 1483 "blanksaroundnumtitle": 1, 1484 }, 1485 "moin": { 1486 "spacedlistitem": 1, 1487 "linkable": 1, 1488 "keeplistindent": 1, 1489 "tableable": 1, 1490 "barinsidequote": 1, 1491 "tabletitlerowinbold": 1, 1492 "tablecellstrip": 1, 1493 "autotocwithbars": 1, 1494 "tablecellaligntype": "cell", 1495 "deflisttextstrip": 1, 1496 "blanksaroundpara": 1, 1497 "blanksaroundverb": 1, 1498 # 'blanksaroundquote':1, 1499 "blanksaroundlist": 1, 1500 "blanksaroundnumlist": 1, 1501 "blanksarounddeflist": 1, 1502 "blanksaroundtable": 1, 1503 # 'blanksaroundbar':1, 1504 "blanksaroundtitle": 1, 1505 "blanksaroundnumtitle": 1, 1506 }, 1507 "gwiki": { 1508 "spacedlistitem": 1, 1509 "linkable": 1, 1510 "keeplistindent": 1, 1511 "tableable": 1, 1512 "tabletitlerowinbold": 1, 1513 "tablecellstrip": 1, 1514 "autonumberlist": 1, 1515 "blanksaroundpara": 1, 1516 "blanksaroundverb": 1, 1517 # 'blanksaroundquote':1, 1518 "blanksaroundlist": 1, 1519 "blanksaroundnumlist": 1, 1520 "blanksarounddeflist": 1, 1521 "blanksaroundtable": 1, 1522 # 'blanksaroundbar':1, 1523 "blanksaroundtitle": 1, 1524 "blanksaroundnumtitle": 1, 1525 }, 1526 "adoc": { 1527 "spacedlistitem": 1, 1528 "linkable": 1, 1529 "keeplistindent": 1, 1530 "autonumberlist": 1, 1531 "autonumbertitle": 1, 1532 "listnotnested": 1, 1533 "blanksaroundpara": 1, 1534 "blanksaroundverb": 1, 1535 "blanksaroundlist": 1, 1536 "blanksaroundnumlist": 1, 1537 "blanksarounddeflist": 1, 1538 "blanksaroundtable": 1, 1539 "blanksaroundtitle": 1, 1540 "blanksaroundnumtitle": 1, 1541 }, 1542 "doku": { 1543 "indentverbblock": 1, # DokuWiki uses ' ' to mark verb blocks 1544 "spacedlistitem": 1, 1545 "linkable": 1, 1546 "keeplistindent": 1, 1547 "tableable": 1, 1548 "barinsidequote": 1, 1549 "tablecellstrip": 1, 1550 "autotocwithbars": 1, 1551 "autonumberlist": 1, 1552 "imgalignable": 1, 1553 "tablecellaligntype": "cell", 1554 "blanksaroundpara": 1, 1555 "blanksaroundverb": 1, 1556 # 'blanksaroundquote':1, 1557 "blanksaroundlist": 1, 1558 "blanksaroundnumlist": 1, 1559 "blanksarounddeflist": 1, 1560 "blanksaroundtable": 1, 1561 "blanksaroundbar": 1, 1562 "blanksaroundtitle": 1, 1563 "blanksaroundnumtitle": 1, 1564 }, 1565 "pmw": { 1566 "indentverbblock": 1, 1567 "spacedlistitem": 1, 1568 "linkable": 1, 1569 "labelbeforelink": 1, 1570 # 'keeplistindent':1, 1571 "tableable": 1, 1572 "barinsidequote": 1, 1573 "tablecellstrip": 1, 1574 "autotocwithbars": 1, 1575 "autonumberlist": 1, 1576 "spacedlistitemopen": 1, 1577 "spacednumlistitemopen": 1, 1578 "imgalignable": 1, 1579 "tabletitlerowinbold": 1, 1580 "tablecellaligntype": "cell", 1581 "blanksaroundpara": 1, 1582 "blanksaroundverb": 1, 1583 "blanksaroundquote": 1, 1584 "blanksaroundlist": 1, 1585 "blanksaroundnumlist": 1, 1586 "blanksarounddeflist": 1, 1587 "blanksaroundtable": 1, 1588 "blanksaroundbar": 1, 1589 "blanksaroundtitle": 1, 1590 "blanksaroundnumtitle": 1, 1591 }, 1592 "wiki": { 1593 "linkable": 1, 1594 "tableable": 1, 1595 "tablecellstrip": 1, 1596 "autotocwithbars": 1, 1597 "spacedlistitemopen": 1, 1598 "spacednumlistitemopen": 1, 1599 "deflisttextstrip": 1, 1600 "autonumberlist": 1, 1601 "imgalignable": 1, 1602 "blanksaroundpara": 1, 1603 "blanksaroundverb": 1, 1604 # 'blanksaroundquote':1, 1605 "blanksaroundlist": 1, 1606 "blanksaroundnumlist": 1, 1607 "blanksarounddeflist": 1, 1608 "blanksaroundtable": 1, 1609 "blanksaroundbar": 1, 1610 "blanksaroundtitle": 1, 1611 "blanksaroundnumtitle": 1, 1612 }, 1613 "man": { 1614 "spacedlistitem": 1, 1615 "tagnotindentable": 1, 1616 "tableable": 1, 1617 "tablecellaligntype": "column", 1618 "tabletitlerowinbold": 1, 1619 "tablecellstrip": 1, 1620 "barinsidequote": 1, 1621 "parainsidelist": 0, 1622 "blanksaroundpara": 0, 1623 "blanksaroundverb": 1, 1624 # 'blanksaroundquote':1, 1625 "blanksaroundlist": 1, 1626 "blanksaroundnumlist": 1, 1627 "blanksarounddeflist": 1, 1628 "blanksaroundtable": 1, 1629 # 'blanksaroundbar':1, 1630 "blanksaroundtitle": 0, 1631 "blanksaroundnumtitle": 1, 1632 }, 1633 "creole": { 1634 "linkable": 1, 1635 "tableable": 1, 1636 "imglinkable": 1, 1637 "tablecellstrip": 1, 1638 "autotocwithbars": 1, 1639 "spacedlistitemopen": 1, 1640 "spacednumlistitemopen": 1, 1641 "deflisttextstrip": 1, 1642 "verbblocknotescaped": 1, 1643 "blanksaroundpara": 1, 1644 "blanksaroundverb": 1, 1645 "blanksaroundquote": 1, 1646 "blanksaroundlist": 1, 1647 "blanksaroundnumlist": 1, 1648 "blanksarounddeflist": 1, 1649 "blanksaroundtable": 1, 1650 "blanksaroundbar": 1, 1651 "blanksaroundtitle": 1, 1652 }, 1653 "md": { 1654 # "keeplistindent": 1, 1655 "linkable": 1, 1656 "labelbeforelink": 1, 1657 "tableable": 1, 1658 "imglinkable": 1, 1659 "tablecellstrip": 1, 1660 "autonumberlist": 1, 1661 "spacedlistitemopen": 1, 1662 "spacednumlistitemopen": 1, 1663 "deflisttextstrip": 1, 1664 "blanksaroundpara": 1, 1665 "blanksaroundlist": 1, 1666 "blanksaroundnumlist": 1, 1667 # "blanksarounddeflist": 1, 1668 "blanksaroundtable": 1, 1669 "blanksaroundbar": 1, 1670 "blanksaroundtitle": 1, 1671 }, 1672 } 1673 assert set(rules_bank) == set(TARGETS) 1674 1675 for target, rules in rules_bank.items(): 1676 for rule in rules: 1677 if rule not in allrules: 1678 raise AssertionError( 1679 "{} target has invalid rule {}".format(target, rule) 1680 ) 1681 1682 ret = collections.defaultdict(int) 1683 ret.update(rules_bank[config["target"]]) 1684 return ret 1685 1686 1687############################################################################## 1688 1689 1690def getRegexes(): 1691 "Returns all the regexes used to find the t2t marks" 1692 1693 bank = { 1694 "blockVerbOpen": re.compile(r"^```\s*$"), 1695 "blockVerbClose": re.compile(r"^```\s*$"), 1696 "blockRawOpen": re.compile(r'^"""\s*$'), 1697 "blockRawClose": re.compile(r'^"""\s*$'), 1698 "blockTaggedOpen": re.compile(r"^'''\s*$"), 1699 "blockTaggedClose": re.compile(r"^'''\s*$"), 1700 "blockCommentOpen": re.compile(r"^%%%\s*$"), 1701 "blockCommentClose": re.compile(r"^%%%\s*$"), 1702 "quote": re.compile(r"^\t+"), 1703 "1lineVerb": re.compile(r"^``` (?=.)"), 1704 "1lineRaw": re.compile(r'^""" (?=.)'), 1705 "1lineTagged": re.compile(r"^''' (?=.)"), 1706 # mono, raw, bold, italic, underline: 1707 # - marks must be glued with the contents, no boundary spaces 1708 # - they are greedy, so in ****bold****, turns to <b>**bold**</b> 1709 "fontMono": re.compile(r"``([^\s](|.*?[^\s])`*)``"), 1710 "raw": re.compile(r'""([^\s](|.*?[^\s])"*)""'), 1711 "tagged": re.compile(r"''([^\s](|.*?[^\s])'*)''"), 1712 "fontBold": re.compile(r"\*\*([^\s](|.*?[^\s])\**)\*\*"), 1713 "fontItalic": re.compile(r"//([^\s](|.*?[^\s])/*)//"), 1714 "fontUnderline": re.compile(r"__([^\s](|.*?[^\s])_*)__"), 1715 "fontStrike": re.compile(r"--([^\s](|.*?[^\s])-*)--"), 1716 "list": re.compile(r"^( *)(-) (?=[^ ])"), 1717 "numlist": re.compile(r"^( *)(\+) (?=[^ ])"), 1718 "deflist": re.compile(r"^( *)(:) (.*)$"), 1719 "listclose": re.compile(r"^( *)([-+:])\s*$"), 1720 "bar": re.compile(r"^(\s*)([_=-]{20,})\s*$"), 1721 "table": re.compile(r"^ *\|([|_/])? "), 1722 "blankline": re.compile(r"^\s*$"), 1723 "comment": re.compile(r"^%"), 1724 # Auxiliary tag regexes 1725 "_imgAlign": re.compile(r"~A~", re.I), 1726 "_tableAlign": re.compile(r"~A~", re.I), 1727 "_anchor": re.compile(r"~A~", re.I), 1728 "_tableBorder": re.compile(r"~B~", re.I), 1729 "_tableColAlign": re.compile(r"~C~", re.I), 1730 "_tableCellColSpan": re.compile(r"~S~", re.I), 1731 "_tableCellAlign": re.compile(r"~A~", re.I), 1732 } 1733 1734 # Special char to place data on TAGs contents (\a == bell) 1735 bank["x"] = re.compile("\a") 1736 1737 # Almost complicated title regexes ;) 1738 titskel = r"^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$" 1739 bank["title"] = re.compile(titskel % ("[=]{1,5}", "[^=](|.*[^=])")) 1740 bank["numtitle"] = re.compile(titskel % ("[+]{1,5}", "[^+](|.*[^+])")) 1741 1742 # Complicated regexes begin here ;) 1743 # 1744 # Textual descriptions on --help's style: [...] is optional, | is OR 1745 1746 # First, some auxiliary variables 1747 # 1748 1749 # [image.EXT] 1750 patt_img = r"\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp|svg))\]" 1751 1752 # Link things 1753 # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html 1754 # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@ 1755 # Recomended order: scheme://user:pass@domain/path?query=foo#anchor 1756 # Also works : scheme://user:pass@domain/path#anchor?query=foo 1757 # TODO form: !'(): 1758 urlskel = { 1759 "proto": r"(https?|ftp|news|telnet|gopher|wais)://", 1760 "guess": r"(www[23]?|ftp)\.", # w/out proto, try to guess 1761 "login": r"A-Za-z0-9_.-", # for ftp://login@domain.com 1762 "pass": r"[^ @]*", # for ftp://login:pass@dom.com 1763 "chars": r"A-Za-z0-9%._/~:,=$@&+-", # %20(space), :80(port), D&D 1764 "anchor": r"A-Za-z0-9%._-", # %nn(encoded) 1765 "form": r"A-Za-z0-9/%&=+:;.,$@*_-", # .,@*_-(as is) 1766 "punct": r".,;:!?", 1767 } 1768 1769 # username [ :password ] @ 1770 patt_url_login = r"([{}]+(:{})?@)?".format(urlskel["login"], urlskel["pass"]) 1771 1772 # [ http:// ] [ username:password@ ] domain.com [ / ] 1773 # [ #anchor | ?form=data ] 1774 retxt_url = r"\b({}{}|{})[{}]+\b/*(\?[{}]+)?(#[{}]*)?".format( 1775 urlskel["proto"], 1776 patt_url_login, 1777 urlskel["guess"], 1778 urlskel["chars"], 1779 urlskel["form"], 1780 urlskel["anchor"], 1781 ) 1782 1783 # filename | [ filename ] #anchor 1784 retxt_url_local = r"[{}]+|[{}]*(#[{}]*)".format( 1785 urlskel["chars"], urlskel["chars"], urlskel["anchor"] 1786 ) 1787 1788 # user@domain [ ?form=data ] 1789 patt_email = r"\b[{}]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{{2,4}}\b(\?[{}]+)?".format( 1790 urlskel["login"], urlskel["form"] 1791 ) 1792 1793 # Saving for future use 1794 bank["_urlskel"] = urlskel 1795 1796 # And now the real regexes 1797 1798 bank["email"] = re.compile(patt_email, re.I) 1799 1800 # email | url 1801 bank["link"] = re.compile(r"{}|{}".format(retxt_url, patt_email), re.I) 1802 1803 # \[ label | imagetag url | email | filename \] 1804 bank["linkmark"] = re.compile( 1805 r"\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]" 1806 % (patt_img, retxt_url, patt_email, retxt_url_local), 1807 re.I, 1808 ) 1809 1810 # Image 1811 bank["img"] = re.compile(patt_img, re.I) 1812 1813 # Special things 1814 bank["special"] = re.compile(r"^%!\s*") 1815 return bank 1816 1817 1818# END OF regex nightmares 1819 1820 1821class error(Exception): 1822 pass 1823 1824 1825def Quit(msg=""): 1826 if msg: 1827 print(msg) 1828 sys.exit(0) 1829 1830 1831def Error(msg): 1832 msg = "%s: Error: " % my_name + msg 1833 raise error(msg) 1834 1835 1836def getTraceback(): 1837 try: 1838 from traceback import format_exception 1839 1840 etype, value, tb = sys.exc_info() 1841 return "".join(format_exception(etype, value, tb)) 1842 except Exception: 1843 pass 1844 1845 1846def getUnknownErrorMessage(): 1847 msg = "{}\n{} ({}):\n\n{}".format( 1848 "Sorry! Txt2tags aborted by an unknown error.", 1849 "Please send the following Error Traceback to the author", 1850 my_email, 1851 getTraceback(), 1852 ) 1853 return msg 1854 1855 1856def Message(msg, level): 1857 if level <= VERBOSE and not QUIET: 1858 prefix = "-" * 5 1859 print("{} {}".format(prefix * level, msg)) 1860 1861 1862def Debug(msg, id_=0, linenr=None): 1863 """Show debug messages, categorized.""" 1864 if QUIET or not DEBUG: 1865 return 1866 ids = ["INI", "CFG", "SRC", "BLK", "HLD", "GUI", "OUT", "DET"] 1867 if linenr is not None: 1868 msg = "LINE %04d: %s" % (linenr, msg) 1869 print("++ {}: {}".format(ids[id_], msg)) 1870 1871 1872def Readfile(file_path): 1873 if file_path == "-": 1874 try: 1875 contents = sys.stdin.read() 1876 except KeyboardInterrupt: 1877 Error("You must feed me with data on STDIN!") 1878 else: 1879 try: 1880 with io.open(file_path, encoding=ENCODING) as f: 1881 contents = f.read() 1882 except IOError as exception: 1883 Error("Cannot read file: {}\n{}".format(file_path, exception)) 1884 lines = contents.splitlines() 1885 Message("File read (%d lines): %s" % (len(lines), file_path), 2) 1886 return lines 1887 1888 1889def Savefile(file_path, lines): 1890 contents = "\n".join(lines) + "\n" 1891 try: 1892 with io.open(file_path, "w", encoding=ENCODING) as f: 1893 try: 1894 f.write(contents) 1895 except TypeError: 1896 f.write(contents.decode(ENCODING)) 1897 except IOError as exception: 1898 Error("Cannot open file for writing: {}\n{}".format(file_path, exception)) 1899 1900 1901def dotted_spaces(txt=""): 1902 return txt.replace(" ", ".") 1903 1904 1905# TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html 1906def get_rc_path(): 1907 "Return the full path for the users' RC file" 1908 # Try to get the path from an env var. if yes, we're done 1909 user_defined = os.environ.get("T2TCONFIG") 1910 if user_defined: 1911 return user_defined 1912 # Env var not found, so perform automatic path composing 1913 # Set default filename according system platform 1914 rc_names = {"default": ".txt2tagsrc", "win": "_t2trc"} 1915 rc_file = rc_names.get(sys.platform[:3]) or rc_names["default"] 1916 # The file must be on the user directory, but where is this dir? 1917 rc_dir_search = ["HOME", "HOMEPATH"] 1918 for var in rc_dir_search: 1919 rc_dir = os.environ.get(var) 1920 if rc_dir: 1921 break 1922 # rc dir found, now we must join dir+file to compose the full path 1923 if rc_dir: 1924 # Compose path and return it if the file exists 1925 rc_path = os.path.join(rc_dir, rc_file) 1926 # On windows, prefix with the drive (%homedrive%: 2k/XP/NT) 1927 if sys.platform.startswith("win"): 1928 rc_drive = os.environ.get("HOMEDRIVE") 1929 rc_path = os.path.join(rc_drive, rc_path) 1930 return rc_path 1931 # Sorry, not found 1932 return "" 1933 1934 1935############################################################################## 1936 1937 1938class CommandLine: 1939 """ 1940 Command Line class - Masters command line 1941 1942 This class checks and extract data from the provided command line. 1943 The --long options and flags are taken from the global OPTIONS, 1944 FLAGS and ACTIONS dictionaries. The short options are registered 1945 here, and also their equivalence to the long ones. 1946 1947 _compose_short_opts() -> str 1948 _compose_long_opts() -> list 1949 Compose the valid short and long options list, on the 1950 'getopt' format. 1951 1952 parse() -> (opts, args) 1953 Call getopt to check and parse the command line. 1954 It expects to receive the command line as a list, and 1955 without the program name (sys.argv[1:]). 1956 1957 get_raw_config() -> [RAW config] 1958 Scans command line and convert the data to the RAW config 1959 format. See ConfigMaster class to the RAW format description. 1960 Optional 'ignore' and 'filter_' arguments are used to filter 1961 in or out specified keys. 1962 1963 The get_raw_config() calls parse(), so the typical use of this 1964 class is: 1965 1966 raw = CommandLine().get_raw_config(sys.argv[1:]) 1967 """ 1968 1969 def __init__(self): 1970 self.all_options = list(OPTIONS.keys()) 1971 self.all_flags = list(FLAGS.keys()) 1972 self.all_actions = list(ACTIONS.keys()) 1973 1974 # short:long options equivalence 1975 self.short_long = { 1976 "C": "config-file", 1977 "h": "help", 1978 "H": "no-headers", 1979 "i": "infile", 1980 "n": "enum-title", 1981 "o": "outfile", 1982 "q": "quiet", 1983 "t": "target", 1984 "v": "verbose", 1985 "V": "version", 1986 } 1987 1988 # Compose valid short and long options data for getopt 1989 self.short_opts = self._compose_short_opts() 1990 self.long_opts = self._compose_long_opts() 1991 1992 def _compose_short_opts(self): 1993 "Returns a string like 'hVt:o' with all short options/flags" 1994 ret = [] 1995 for opt in self.short_long.keys(): 1996 long_ = self.short_long[opt] 1997 if long_ in self.all_options: # is flag or option? 1998 opt = opt + ":" # option: have param 1999 ret.append(opt) 2000 # Debug('Valid SHORT options: %s'%ret) 2001 return "".join(ret) 2002 2003 def _compose_long_opts(self): 2004 "Returns a list with all the valid long options/flags" 2005 ret = [x + "=" for x in self.all_options] # add = 2006 ret.extend(self.all_flags) # flag ON 2007 ret.extend(self.all_actions) # actions 2008 ret.extend(["no-" + x for x in self.all_flags]) # add no-* 2009 ret.extend(["no-style"]) # turn OFF 2010 ret.extend(["no-outfile", "no-infile"]) # turn OFF 2011 ret.extend(["no-targets"]) # turn OFF 2012 # Debug('Valid LONG options: %s'%ret) 2013 return ret 2014 2015 def _tokenize(self, cmd_string=""): 2016 "Convert a command line string to a list" 2017 # TODO protect quotes contents -- Don't use it, pass cmdline as list 2018 return cmd_string.split() 2019 2020 def parse(self, cmdline): 2021 "Check/Parse a command line list TIP: no program name!" 2022 # Get the valid options 2023 short, long_ = self.short_opts, self.long_opts 2024 # Parse it! 2025 try: 2026 opts, args = getopt.getopt(cmdline, short, long_) 2027 except getopt.error as errmsg: 2028 Error("%s (try --help)" % errmsg) 2029 return (opts, args) 2030 2031 def get_raw_config(self, cmdline=None, ignore=None, filter_=None, relative=False): 2032 "Returns the options/arguments found as RAW config" 2033 2034 if not cmdline: 2035 return [] 2036 ignore = ignore or [] 2037 filter_ = filter_ or [] 2038 2039 ret = [] 2040 2041 # We need lists, not strings (such as from %!options) 2042 if not isinstance(cmdline, list): 2043 cmdline = self._tokenize(cmdline) 2044 2045 # Extract name/value pair of all configs, check for invalid names 2046 options, arguments = self.parse(cmdline[:]) 2047 2048 # Some cleanup on the raw config 2049 for name, value in options: 2050 2051 # Remove leading - and -- 2052 name = re.sub("^--?", "", name) 2053 2054 # Translate short option to long 2055 if len(name) == 1: 2056 name = self.short_long[name] 2057 2058 # Outfile exception: path relative to PWD 2059 if name == "outfile" and relative and value not in [STDOUT, MODULEOUT]: 2060 value = os.path.abspath(value) 2061 2062 # -C, --config-file inclusion, path relative to PWD 2063 if name == "config-file": 2064 ret.extend(ConfigLines().include_config_file(value)) 2065 continue 2066 2067 # Save this config 2068 ret.append(["all", name, value]) 2069 2070 # All configuration was read and saved 2071 2072 # Get infile, if any 2073 while arguments: 2074 infile = arguments.pop(0) 2075 ret.append(["all", "infile", infile]) 2076 2077 # Apply 'ignore' and 'filter_' rules (filter_ is stronger) 2078 if ignore or filter_: 2079 filtered = [] 2080 for target, name, value in ret: 2081 if (filter_ and name in filter_) or (ignore and name not in ignore): 2082 filtered.append([target, name, value]) 2083 ret = filtered[:] 2084 2085 return ret 2086 2087 2088############################################################################## 2089 2090 2091class SourceDocument: 2092 """ 2093 SourceDocument class - scan document structure, extract data 2094 2095 It knows about full files. It reads a file and identify all 2096 the areas beginning (Head,Conf,Body). With this info it can 2097 extract each area contents. 2098 Note: the original line break is removed. 2099 2100 DATA: 2101 self.arearef - Save Head, Conf, Body init line number 2102 self.areas - Store the area names which are not empty 2103 self.buffer - The full file contents (with NO \\r, \\n) 2104 2105 METHODS: 2106 get() - Access the contents of an Area. Example: 2107 config = SourceDocument(file).get('conf') 2108 2109 split() - Get all the document Areas at once. Example: 2110 head, conf, body = SourceDocument(file).split() 2111 2112 RULES: 2113 * The document parts are sequential: Head, Conf and Body. 2114 * One ends when the next begins. 2115 * The Conf Area is optional, so a document can have just 2116 Head and Body Areas. 2117 2118 These are the Areas limits: 2119 - Head Area: the first three lines 2120 - Body Area: from the first valid text line to the end 2121 - Conf Area: the comments between Head and Body Areas 2122 2123 Exception: If the first line is blank, this means no 2124 header info, so the Head Area is just the first line. 2125 """ 2126 2127 def __init__(self, filename="", contents=None): 2128 self.areas = ["head", "conf", "body"] 2129 self.arearef = [] 2130 self.areas_fancy = "" 2131 self.filename = filename 2132 self.buffer = [] 2133 if filename: 2134 self.scan_file(filename) 2135 elif contents: 2136 self.scan(contents) 2137 2138 def split(self): 2139 "Returns all document parts, splitted into lists." 2140 return self.get("head"), self.get("conf"), self.get("body") 2141 2142 def get(self, areaname): 2143 "Returns head|conf|body contents from self.buffer" 2144 # Sanity 2145 if areaname not in self.areas: 2146 return [] 2147 if not self.buffer: 2148 return [] 2149 # Go get it 2150 bufini = 1 2151 bufend = len(self.buffer) 2152 if areaname == "head": 2153 ini = bufini 2154 end = self.arearef[1] or self.arearef[2] or bufend 2155 elif areaname == "conf": 2156 ini = self.arearef[1] 2157 end = self.arearef[2] or bufend 2158 elif areaname == "body": 2159 ini = self.arearef[2] 2160 end = bufend 2161 else: 2162 Error("Unknown Area name '%s'" % areaname) 2163 lines = self.buffer[ini:end] 2164 # Make sure head will always have 3 lines 2165 while areaname == "head" and len(lines) < 3: 2166 lines.append("") 2167 return lines 2168 2169 def scan_file(self, filename): 2170 Debug("source file: %s" % filename) 2171 Message("Loading source document", 1) 2172 buf = Readfile(filename) 2173 self.scan(buf) 2174 2175 def scan(self, lines): 2176 "Run through source file and identify head/conf/body areas" 2177 buf = lines 2178 if len(buf) == 0: 2179 Error("The input file is empty: %s" % self.filename) 2180 cfg_parser = ConfigLines().parse_line 2181 buf.insert(0, "") # text start at pos 1 2182 ref = [1, 4, 0] 2183 if not buf[1].strip(): # no header 2184 ref[0] = 0 2185 ref[1] = 2 2186 rgx = getRegexes() 2187 on_comment_block = 0 2188 for i in range(ref[1], len(buf)): # find body init: 2189 # Handle comment blocks inside config area 2190 if not on_comment_block and rgx["blockCommentOpen"].search(buf[i]): 2191 on_comment_block = 1 2192 continue 2193 if on_comment_block and rgx["blockCommentOpen"].search(buf[i]): 2194 on_comment_block = 0 2195 continue 2196 if on_comment_block: 2197 continue 2198 2199 if buf[i].strip() and ( 2200 buf[i][0] != "%" or cfg_parser(buf[i], "include")[1] 2201 ): 2202 ref[2] = i 2203 break 2204 if ref[1] == ref[2]: 2205 ref[1] = 0 # no conf area 2206 for i in 0, 1, 2: # del !existent 2207 if ref[i] >= len(buf): 2208 ref[i] = 0 # title-only 2209 if not ref[i]: 2210 self.areas[i] = "" 2211 Debug("Head,Conf,Body start line: %s" % ref) 2212 self.arearef = ref # save results 2213 self.buffer = buf 2214 # Fancyness sample: head conf body (1 4 8) 2215 self.areas_fancy = "{} ({})".format( 2216 " ".join(self.areas), " ".join(str(x or "") for x in ref) 2217 ) 2218 Message("Areas found: %s" % self.areas_fancy, 2) 2219 2220 def get_raw_config(self): 2221 "Handy method to get the CONF area RAW config (if any)" 2222 if not self.areas.count("conf"): 2223 return [] 2224 Message("Scanning source document CONF area", 1) 2225 raw = ConfigLines( 2226 file_=self.filename, lines=self.get("conf"), first_line=self.arearef[1] 2227 ).get_raw_config() 2228 Debug("document raw config: %s" % raw, 1) 2229 return raw 2230 2231 2232############################################################################## 2233 2234 2235class ConfigMaster: 2236 """ 2237 ConfigMaster class - the configuration wizard 2238 2239 This class is the configuration master. It knows how to handle 2240 the RAW and PARSED config format. It also performs the sanity 2241 checking for a given configuration. 2242 2243 DATA: 2244 self.raw - Stores the config on the RAW format 2245 self.parsed - Stores the config on the PARSED format 2246 self.defaults - Stores the default values for all keys 2247 self.off - Stores the OFF values for all keys 2248 self.multi - List of keys which can have multiple values 2249 self.incremental - List of keys which are incremental 2250 2251 RAW FORMAT: 2252 The RAW format is a list of lists, being each mother list item 2253 a full configuration entry. Any entry is a 3 item list, on 2254 the following format: [ TARGET, KEY, VALUE ] 2255 Being a list, the order is preserved, so it's easy to use 2256 different kinds of configs, as CONF area and command line, 2257 respecting the precedence. 2258 The special target 'all' is used when no specific target was 2259 defined on the original config. 2260 2261 PARSED FORMAT: 2262 The PARSED format is a dictionary, with all the 'key : value' 2263 found by reading the RAW config. The self.target contents 2264 matters, so this dictionary only contains the target's 2265 config. The configs of other targets are ignored. 2266 2267 The CommandLine and ConfigLines classes have the get_raw_config() 2268 method which convert the configuration found to the RAW format. 2269 Just feed it to parse() and get a brand-new ready-to-use config 2270 dictionary. Example: 2271 2272 >>> raw = CommandLine().get_raw_config(['-n', '-H']) 2273 >>> print raw 2274 [['all', 'enum-title', ''], ['all', 'no-headers', '']] 2275 >>> parsed = ConfigMaster(raw).parse() 2276 >>> print parsed 2277 {'enum-title': 1, 'headers': 0} 2278 """ 2279 2280 def __init__(self, raw=None, target=""): 2281 self.raw = raw or [] 2282 self.target = target 2283 self.parsed = {} 2284 self.dft_options = OPTIONS.copy() 2285 self.dft_flags = FLAGS.copy() 2286 self.dft_actions = ACTIONS.copy() 2287 self.defaults = self._get_defaults() 2288 self.off = self._get_off() 2289 self.incremental = ["verbose"] 2290 self.multi = ["infile", "preproc", "postproc", "options", "style"] 2291 2292 def _get_defaults(self): 2293 "Get the default values for all config/options/flags" 2294 empty = {} 2295 for kw in CONFIG_KEYWORDS: 2296 empty[kw] = "" 2297 empty.update(self.dft_options) 2298 empty.update(self.dft_flags) 2299 empty.update(self.dft_actions) 2300 empty["sourcefile"] = "" # internal use only 2301 return empty 2302 2303 def _get_off(self): 2304 "Turns OFF all the config/options/flags" 2305 off = {} 2306 for key in self.defaults.keys(): 2307 kind = type(self.defaults[key]) 2308 if kind == int: 2309 off[key] = 0 2310 elif kind == str: 2311 off[key] = "" 2312 elif kind == list: 2313 off[key] = [] 2314 else: 2315 Error("ConfigMaster: %s: Unknown type" % key) 2316 return off 2317 2318 def _check_target(self): 2319 "Checks if the target is already defined. If not, do it" 2320 if not self.target: 2321 self.target = self.find_value("target") 2322 2323 def get_target_raw(self): 2324 "Returns the raw config for self.target or 'all'" 2325 ret = [] 2326 self._check_target() 2327 for entry in self.raw: 2328 if entry[0] == self.target or entry[0] == "all": 2329 ret.append(entry) 2330 return ret 2331 2332 def add(self, key, val): 2333 "Adds the key:value pair to the config dictionary (if needed)" 2334 # %!options 2335 if key == "options": 2336 ignoreme = list(self.dft_actions.keys()) + ["target"] 2337 ignoreme.remove("targets") 2338 raw_opts = CommandLine().get_raw_config(val, ignore=ignoreme) 2339 for _target, key, val in raw_opts: 2340 self.add(key, val) 2341 return 2342 # The no- prefix turns OFF this key 2343 if key.startswith("no-"): 2344 key = key[3:] # remove prefix 2345 val = self.off.get(key) # turn key OFF 2346 # Is this key valid? 2347 if key not in self.defaults.keys(): 2348 Debug("Bogus Config {}:{}".format(key, val), 1) 2349 return 2350 # Is this value the default one? 2351 if val == self.defaults.get(key): 2352 # If default value, remove previous key:val 2353 if key in self.parsed: 2354 del self.parsed[key] 2355 # Nothing more to do 2356 return 2357 # Flags ON comes empty. we'll add the 1 value now 2358 if val == "" and ( 2359 key in self.dft_flags.keys() or key in self.dft_actions.keys() 2360 ): 2361 val = 1 2362 # Multi value or single? 2363 if key in self.multi: 2364 # First one? start new list 2365 if key not in self.parsed: 2366 self.parsed[key] = [] 2367 self.parsed[key].append(val) 2368 # Incremental value? so let's add it 2369 elif key in self.incremental: 2370 self.parsed[key] = (self.parsed.get(key) or 0) + val 2371 else: 2372 self.parsed[key] = val 2373 fancykey = dotted_spaces("%12s" % key) 2374 Message("Added config {} : {}".format(fancykey, val), 3) 2375 2376 def get_outfile_name(self, config): 2377 "Dirname is the same for {in,out}file" 2378 infile, outfile = config["sourcefile"], config["outfile"] 2379 if ( 2380 outfile 2381 and outfile not in (STDOUT, MODULEOUT) 2382 and not os.path.isabs(outfile) 2383 ): 2384 outfile = os.path.join(os.path.dirname(infile), outfile) 2385 if infile == STDIN and not outfile: 2386 outfile = STDOUT 2387 if infile == MODULEIN and not outfile: 2388 outfile = MODULEOUT 2389 if not outfile and (infile and config.get("target")): 2390 basename = re.sub(r"\.(txt|t2t)$", "", infile) 2391 outfile = "{}.{}".format(basename, config["target"]) 2392 Debug(" infile: '%s'" % infile, 1) 2393 Debug("outfile: '%s'" % outfile, 1) 2394 return outfile 2395 2396 def sanity(self, config): 2397 "Basic config sanity checking" 2398 if not config: 2399 return {} 2400 target = config.get("target") 2401 # Some actions don't require target specification 2402 if not target: 2403 for action in NO_TARGET: 2404 if config.get(action): 2405 target = "txt" 2406 break 2407 2408 # We *need* a target 2409 if not target: 2410 Error( 2411 "No target specified (try --help)." 2412 + "\n\n" 2413 + "Please select a target using the -t option or the %!target command." 2414 + "\n" 2415 + "Example:" 2416 + " {} -t html {}".format(my_name, "file.t2t") 2417 + "\n\n" 2418 + "Run 'txt2tags --targets' to see all available targets." 2419 ) 2420 # And of course, an infile also 2421 if "infile" not in config: 2422 Error("Missing input file (try --help)") 2423 # Is the target valid? 2424 if not TARGETS.count(target): 2425 Error( 2426 "Invalid target '%s'" % target 2427 + "\n\n" 2428 + "Run 'txt2tags --targets' to see all the available targets." 2429 ) 2430 # Ensure all keys are present 2431 empty = self.defaults.copy() 2432 empty.update(config) 2433 config = empty.copy() 2434 # Restore target 2435 config["target"] = target 2436 # Set output file name 2437 config["outfile"] = self.get_outfile_name(config) 2438 # Checking suicide 2439 if os.path.abspath(config["sourcefile"]) == os.path.abspath( 2440 config["outfile"] 2441 ) and config["outfile"] not in [STDOUT, MODULEOUT]: 2442 Error("Input and Output files are the same: %s" % config["outfile"]) 2443 return config 2444 2445 def parse(self): 2446 "Returns the parsed config for the current target" 2447 raw = self.get_target_raw() 2448 for _target, key, value in raw: 2449 self.add(key, value) 2450 Message("Added the following keys: %s" % ", ".join(sorted(self.parsed)), 2) 2451 return self.parsed.copy() 2452 2453 def find_value(self, key="", target=""): 2454 "Scans ALL raw config to find the desired key" 2455 ret = [] 2456 # Scan and save all values found 2457 for targ, k, val in self.raw: 2458 if k == key and (targ == target or targ == "all"): 2459 ret.append(val) 2460 if not ret: 2461 return "" 2462 # If not multi value, return only the last found 2463 if key in self.multi: 2464 return ret 2465 else: 2466 return ret[-1] 2467 2468 2469######################################################################## 2470 2471 2472class ConfigLines: 2473 """ 2474 ConfigLines class - the config file data extractor 2475 2476 This class reads and parse the config lines on the %!key:val 2477 format, converting it to RAW config. It deals with user 2478 config file (RC file), source document CONF area and 2479 %!includeconf directives. 2480 2481 Call it passing a file name or feed the desired config lines. 2482 Then just call the get_raw_config() method and wait to 2483 receive the full config data on the RAW format. This method 2484 also follows the possible %!includeconf directives found on 2485 the config lines. Example: 2486 2487 raw = ConfigLines(file=".txt2tagsrc").get_raw_config() 2488 2489 The parse_line() method is also useful to be used alone, 2490 to identify and tokenize a single config line. For example, 2491 to get the %!include command components, on the source 2492 document BODY: 2493 2494 target, key, value = ConfigLines().parse_line(body_line) 2495 """ 2496 2497 def __init__(self, file_="", lines=None, first_line=1): 2498 self.file = file_ or "NOFILE" 2499 self.lines = lines or [] 2500 self.first_line = first_line 2501 2502 def load_lines(self): 2503 "Make sure we've loaded the file contents into buffer" 2504 if not self.lines and not self.file: 2505 Error("ConfigLines: No file or lines provided") 2506 if not self.lines: 2507 self.lines = self.read_config_file(self.file) 2508 2509 def read_config_file(self, filename=""): 2510 "Read a Config File contents, aborting on invalid line" 2511 if not filename: 2512 return [] 2513 errormsg = "Invalid CONFIG line on %s" + "\n%03d:%s" 2514 lines = Readfile(filename) 2515 # Sanity: try to find invalid config lines 2516 for i in range(len(lines)): 2517 line = lines[i].rstrip() 2518 if not line: 2519 continue # empty 2520 if line[0] != "%": 2521 Error(errormsg % (filename, i + 1, line)) 2522 return lines 2523 2524 def include_config_file(self, file_=""): 2525 "Perform the %!includeconf action, returning RAW config" 2526 if not file_: 2527 return [] 2528 # Current dir relative to the current file (self.file) 2529 current_dir = os.path.dirname(self.file) 2530 file_ = os.path.join(current_dir, file_) 2531 # Read and parse included config file contents 2532 lines = self.read_config_file(file_) 2533 return ConfigLines(file_=file_, lines=lines).get_raw_config() 2534 2535 def get_raw_config(self): 2536 "Scan buffer and extract all config as RAW (including includes)" 2537 ret = [] 2538 self.load_lines() 2539 first = self.first_line 2540 for i in range(len(self.lines)): 2541 line = self.lines[i] 2542 Message("Processing line %03d: %s" % (first + i, line), 2) 2543 target, key, val = self.parse_line(line) 2544 if not key: 2545 continue # no config on this line 2546 if key == "includeconf": 2547 err = "A file cannot include itself (loop!)" 2548 if val == self.file: 2549 Error("{}: %!includeconf: {}".format(err, self.file)) 2550 more_raw = self.include_config_file(val) 2551 ret.extend(more_raw) 2552 Message("Finished Config file inclusion: %s" % val, 2) 2553 else: 2554 ret.append([target, key, val]) 2555 Message("Added %s" % key, 3) 2556 return ret 2557 2558 def parse_line(self, line="", keyname="", target=""): 2559 "Detects %!key:val config lines and extract data from it" 2560 empty = ["", "", ""] 2561 if not line: 2562 return empty 2563 no_target = ["target", "includeconf"] 2564 re_name = keyname or "[a-z]+" 2565 re_target = target or "[a-z]*" 2566 # XXX TODO <value>\S.+? requires TWO chars, breaks %!include:a 2567 cfgregex = re.compile( 2568 r""" 2569 ^%%!\s* # leading id with opt spaces 2570 (?P<name>%s)\s* # config name 2571 (\((?P<target>%s)\))? # optional target spec inside () 2572 \s*:\s* # key:value delimiter with opt spaces 2573 (?P<value>\S.+?) # config value 2574 \s*$ # rstrip() spaces and hit EOL 2575 """ 2576 % (re_name, re_target), 2577 re.I + re.VERBOSE, 2578 ) 2579 prepostregex = re.compile( 2580 r""" 2581 # ---[ PATTERN ]--- 2582 ^( "([^"]*)" # "double quoted" or 2583 | '([^']*)' # 'single quoted' or 2584 | ([^\s]+) # single_word 2585 ) 2586 \s+ # separated by spaces 2587 2588 # ---[ REPLACE ]--- 2589 ( "([^"]*)" # "double quoted" or 2590 | '([^']*)' # 'single quoted' or 2591 | (.*) # anything 2592 ) 2593 \s*$ 2594 """, 2595 re.VERBOSE, 2596 ) 2597 2598 # Give me a match or get out 2599 match = cfgregex.match(line) 2600 if not match: 2601 return empty 2602 2603 # Save information about this config 2604 name = (match.group("name") or "").lower() 2605 target = (match.group("target") or "all").lower() 2606 value = match.group("value") 2607 2608 # %!keyword(target) not allowed for these 2609 if name in no_target and match.group("target"): 2610 Error("You can't use (target) with %s" % ("%!" + name) + "\n%s" % line) 2611 2612 # Force no_target keywords to be valid for all targets 2613 if name in no_target: 2614 target = "all" 2615 2616 # Special config with two quoted values (%!preproc: "foo" 'bar') 2617 if name == "preproc" or name == "postproc": 2618 valmatch = prepostregex.search(value) 2619 if not valmatch: 2620 return empty 2621 getval = valmatch.group 2622 patt = getval(2) or getval(3) or getval(4) or "" 2623 repl = getval(6) or getval(7) or getval(8) or "" 2624 value = (patt, repl) 2625 return [target, name, value] 2626 2627 2628############################################################################## 2629 2630 2631class MaskMaster: 2632 "(Un)Protect important structures from escaping and formatting" 2633 2634 def __init__(self): 2635 self.linkmask = "vvvLINKvvv" 2636 self.monomask = "vvvMONOvvv" 2637 self.rawmask = "vvvRAWvvv" 2638 self.taggedmask = "vvvTAGGEDvvv" 2639 self.reset() 2640 2641 def reset(self): 2642 self.linkbank = [] 2643 self.monobank = [] 2644 self.rawbank = [] 2645 self.taggedbank = [] 2646 2647 def mask(self, line=""): 2648 # The verbatim, raw and tagged inline marks are mutually exclusive. 2649 # This means that one can't appear inside the other. 2650 # If found, the inner marks must be ignored. 2651 # Example: ``foo ""bar"" ''baz''`` 2652 # In HTML: <code>foo ""bar"" ''baz''</code> 2653 # 2654 # The trick here is to protect the mark who appears first on the line. 2655 # The three regexes are tried and the one with the lowest index wins. 2656 # If none is found (else), we get out of the loop. 2657 # 2658 while True: 2659 try: 2660 t = regex["tagged"].search(line).start() 2661 except Exception: 2662 t = -1 2663 2664 try: 2665 r = regex["raw"].search(line).start() 2666 except Exception: 2667 r = -1 2668 2669 try: 2670 v = regex["fontMono"].search(line).start() 2671 except Exception: 2672 v = -1 2673 2674 # Protect tagged text 2675 if t >= 0 and (r == -1 or t < r) and (v == -1 or t < v): 2676 txt = regex["tagged"].search(line).group(1) 2677 if TARGET == "tex": 2678 txt = txt.replace("_", "vvvUnderscoreInTaggedTextvvv") 2679 self.taggedbank.append(txt) 2680 line = regex["tagged"].sub(self.taggedmask, line, 1) 2681 2682 # Protect raw text 2683 elif r >= 0 and (t == -1 or r < t) and (v == -1 or r < v): 2684 txt = regex["raw"].search(line).group(1) 2685 txt = doEscape(TARGET, txt) 2686 if TARGET == "tex": 2687 txt = txt.replace("_", "vvvUnderscoreInRawTextvvv") 2688 self.rawbank.append(txt) 2689 line = regex["raw"].sub(self.rawmask, line, 1) 2690 2691 # Protect verbatim text 2692 elif v >= 0 and (t == -1 or v < t) and (r == -1 or v < r): 2693 txt = regex["fontMono"].search(line).group(1) 2694 txt = doEscape(TARGET, txt) 2695 self.monobank.append(txt) 2696 line = regex["fontMono"].sub(self.monomask, line, 1) 2697 else: 2698 break 2699 2700 # Protect URLs and emails 2701 while regex["linkmark"].search(line) or regex["link"].search(line): 2702 2703 # Try to match plain or named links 2704 match_link = regex["link"].search(line) 2705 match_named = regex["linkmark"].search(line) 2706 2707 # Define the current match 2708 if match_link and match_named: 2709 # Both types found, which is the first? 2710 m = match_link 2711 if match_named.start() < match_link.start(): 2712 m = match_named 2713 else: 2714 # Just one type found, we're fine 2715 m = match_link or match_named 2716 2717 # Extract link data and apply mask 2718 if m == match_link: # plain link 2719 link = m.group() 2720 label = "" 2721 link_re = regex["link"] 2722 else: # named link 2723 link = m.group("link") 2724 label = m.group("label").rstrip() 2725 link_re = regex["linkmark"] 2726 line = link_re.sub(self.linkmask, line, 1) 2727 2728 # Save link data to the link bank 2729 self.linkbank.append((label, link)) 2730 return line 2731 2732 def undo(self, line): 2733 # url & email 2734 for label, url in self.linkbank: 2735 link = get_tagged_link(label, url) 2736 line = line.replace(self.linkmask, link, 1) 2737 2738 # Expand verb 2739 for mono in self.monobank: 2740 open_, close = TAGS["fontMonoOpen"], TAGS["fontMonoClose"] 2741 line = line.replace(self.monomask, open_ + mono + close, 1) 2742 2743 # Expand raw 2744 for raw in self.rawbank: 2745 line = line.replace(self.rawmask, raw, 1) 2746 2747 # Expand tagged 2748 for tagged in self.taggedbank: 2749 line = line.replace(self.taggedmask, tagged, 1) 2750 2751 return line 2752 2753 2754############################################################################## 2755 2756 2757class TitleMaster: 2758 "Title things" 2759 2760 def __init__(self): 2761 self.count = ["", 0, 0, 0, 0, 0] 2762 self.toc = [] 2763 self.level = 0 2764 self.kind = "" 2765 self.txt = "" 2766 self.label = "" 2767 self.tag = "" 2768 self.tag_hold = [] 2769 self.last_level = 0 2770 self.count_id = "" 2771 self.anchor_count = 0 2772 self.anchor_prefix = "toc" 2773 2774 def _open_close_blocks(self): 2775 "Open new title blocks, closing the previous (if any)" 2776 if not rules["titleblocks"]: 2777 return 2778 tag = "" 2779 last = self.last_level 2780 curr = self.level 2781 2782 # Same level, just close the previous 2783 if curr == last: 2784 tag = TAGS.get("title%dClose" % last) 2785 if tag: 2786 self.tag_hold.append(tag) 2787 2788 # Section -> subsection, more depth 2789 while curr > last: 2790 last += 1 2791 2792 # Open the new block of subsections 2793 tag = TAGS.get("blockTitle%dOpen" % last) 2794 if tag: 2795 self.tag_hold.append(tag) 2796 2797 # Jump from title1 to title3 or more 2798 # Fill the gap with an empty section 2799 if curr - last > 0: 2800 tag = TAGS.get("title%dOpen" % last) 2801 tag = regex["x"].sub("", tag) # del \a 2802 if tag: 2803 self.tag_hold.append(tag) 2804 2805 # Section <- subsection, less depth 2806 while curr < last: 2807 # Close the current opened subsection 2808 tag = TAGS.get("title%dClose" % last) 2809 if tag: 2810 self.tag_hold.append(tag) 2811 2812 # Close the current opened block of subsections 2813 tag = TAGS.get("blockTitle%dClose" % last) 2814 if tag: 2815 self.tag_hold.append(tag) 2816 2817 last -= 1 2818 2819 # Close the previous section of the same level 2820 # The subsections were under it 2821 if curr == last: 2822 tag = TAGS.get("title%dClose" % last) 2823 if tag: 2824 self.tag_hold.append(tag) 2825 2826 def add(self, line): 2827 "Parses a new title line." 2828 if not line: 2829 return 2830 self._set_prop(line) 2831 self._open_close_blocks() 2832 self._set_count_id() 2833 self._set_label() 2834 self._save_toc_info() 2835 2836 def close_all(self): 2837 "Closes all opened title blocks" 2838 ret = [] 2839 ret.extend(self.tag_hold) 2840 while self.level: 2841 tag = TAGS.get("title%dClose" % self.level) 2842 if tag: 2843 ret.append(tag) 2844 tag = TAGS.get("blockTitle%dClose" % self.level) 2845 if tag: 2846 ret.append(tag) 2847 self.level -= 1 2848 return ret 2849 2850 def _save_toc_info(self): 2851 "Save TOC info, used by self.dump_marked_toc()" 2852 self.toc.append((self.level, self.count_id, self.txt, self.label)) 2853 2854 def _set_prop(self, line=""): 2855 "Extract info from original line and set data holders." 2856 # Detect title type (numbered or not) 2857 id_ = line.lstrip()[0] 2858 if id_ == "=": 2859 kind = "title" 2860 elif id_ == "+": 2861 kind = "numtitle" 2862 else: 2863 Error("Unknown Title ID '%s'" % id_) 2864 # Extract line info 2865 match = regex[kind].search(line) 2866 level = len(match.group("id")) 2867 txt = match.group("txt").strip() 2868 label = match.group("label") 2869 # Parse info & save 2870 if CONF["enum-title"]: 2871 kind = "numtitle" # force 2872 if rules["titleblocks"]: 2873 self.tag = TAGS.get("%s%dOpen" % (kind, level)) or TAGS.get( 2874 "title%dOpen" % level 2875 ) 2876 else: 2877 self.tag = TAGS.get(kind + repr(level)) or TAGS.get("title" + repr(level)) 2878 self.last_level = self.level 2879 self.kind = kind 2880 self.level = level 2881 self.txt = txt 2882 self.label = label 2883 2884 def _set_count_id(self): 2885 "Compose and save the title count identifier (if needed)." 2886 count_id = "" 2887 if self.kind == "numtitle" and not rules["autonumbertitle"]: 2888 # Manually increase title count 2889 self.count[self.level] += 1 2890 # Reset sublevels count (if any) 2891 max_levels = len(self.count) 2892 if self.level < max_levels - 1: 2893 for i in range(self.level + 1, max_levels): 2894 self.count[i] = 0 2895 # Compose count id from hierarchy 2896 for i in range(self.level): 2897 count_id = "%s%d." % (count_id, self.count[i + 1]) 2898 self.count_id = count_id 2899 2900 def _set_label(self): 2901 "Compose and save title label, used by anchors." 2902 # Remove invalid chars from label set by user 2903 self.label = re.sub("[^A-Za-z0-9_-]", "", self.label or "") 2904 2905 def _get_tagged_anchor(self): 2906 "Return anchor if user defined a label, or TOC is on." 2907 ret = "" 2908 label = self.label 2909 if CONF["toc"]: 2910 self.anchor_count += 1 2911 # Autonumber label (if needed) 2912 label = label or "{}{}".format(self.anchor_prefix, self.anchor_count) 2913 if label and TAGS["anchor"]: 2914 ret = regex["x"].sub(label, TAGS["anchor"]) 2915 return ret 2916 2917 def _get_full_title_text(self): 2918 "Returns the full title contents, already escaped." 2919 ret = self.txt 2920 # Insert count_id (if any) before text 2921 if self.count_id: 2922 ret = "{} {}".format(self.count_id, ret) 2923 # Escape specials 2924 ret = doEscape(TARGET, ret) 2925 # Same targets needs final escapes on title lines 2926 # It's here because there is a 'continue' after title 2927 if rules["finalescapetitle"]: 2928 ret = doFinalEscape(TARGET, ret) 2929 return ret 2930 2931 def get(self): 2932 "Returns the tagged title as a list." 2933 ret = [] 2934 2935 # Maybe some anchoring before? 2936 anchor = self._get_tagged_anchor() 2937 self.tag = regex["_anchor"].sub(anchor, self.tag) 2938 2939 # Compose & escape title text (TOC uses unescaped) 2940 full_title = self._get_full_title_text() 2941 2942 # Close previous section area 2943 ret.extend(self.tag_hold) 2944 self.tag_hold = [] 2945 2946 tagged = regex["x"].sub(full_title, self.tag) 2947 2948 # Adds "underline" on TXT target 2949 if TARGET == "txt": 2950 if BLOCK.count > 1: 2951 ret.append("") # blank line before 2952 ret.append(tagged) 2953 # Get the right letter count for UTF 2954 if isinstance(full_title, bytes): 2955 full_title = full_title.decode(ENCODING) 2956 ret.append(regex["x"].sub("=" * len(full_title), self.tag)) 2957 else: 2958 ret.append(tagged) 2959 return ret 2960 2961 def dump_marked_toc(self): 2962 "Dumps all toc itens as a valid t2t-marked list" 2963 ret = [] 2964 toc_count = 1 2965 for level, count_id, txt, label in self.toc: 2966 indent = " " * level 2967 id_txt = ("{} {}".format(count_id, txt)).lstrip() 2968 label = label or self.anchor_prefix + repr(toc_count) 2969 toc_count += 1 2970 2971 # TOC will have crosslinks to anchors 2972 if TAGS["anchor"]: 2973 if CONF["enum-title"] and level == 1: 2974 # 1. [Foo #anchor] is more readable than [1. Foo #anchor] in level 1. 2975 # This is an idea stolen from Windows .CHM help files. 2976 tocitem = '{}+ [""{}"" #{}]'.format(indent, txt, label) 2977 else: 2978 tocitem = '{}- [""{}"" #{}]'.format(indent, id_txt, label) 2979 2980 # TOC will be plain text (no links) 2981 else: 2982 if TARGET in ["txt", "man"]: 2983 # For these, the list is not necessary, just dump the text 2984 tocitem = '{}""{}""'.format(indent, id_txt) 2985 else: 2986 tocitem = '{}- ""{}""'.format(indent, id_txt) 2987 ret.append(tocitem) 2988 return ret 2989 2990 2991############################################################################## 2992 2993# TODO check all this table mess 2994# It uses parse_row properties for table lines 2995# BLOCK.table() replaces the cells by the parsed content 2996class TableMaster: 2997 def __init__(self, line=""): 2998 self.rows = [] 2999 self.border = False 3000 self.align = "Left" 3001 self.cellalign = [] 3002 self.colalign = [] 3003 self.cellspan = [] 3004 if line: 3005 prop = self.parse_row(line) 3006 self.border = prop["border"] 3007 self.align = prop["align"] 3008 self.cellalign = prop["cellalign"] 3009 self.cellspan = prop["cellspan"] 3010 self.colalign = self._get_col_align() 3011 3012 def _get_col_align(self): 3013 colalign = [] 3014 for cell in range(len(self.cellalign)): 3015 align = self.cellalign[cell] 3016 span = self.cellspan[cell] 3017 colalign.extend([align] * span) 3018 return colalign 3019 3020 def _get_open_tag(self): 3021 topen = TAGS["tableOpen"] 3022 tborder = TAGS["_tableBorder"] 3023 talign = TAGS["_tableAlign" + self.align] 3024 calignsep = TAGS["tableColAlignSep"] 3025 calign = "" 3026 3027 # The first line defines if table has border or not 3028 if not self.border: 3029 tborder = "" 3030 # Set the columns alignment 3031 if rules["tablecellaligntype"] == "column": 3032 calign = [TAGS["_tableColAlign%s" % x] for x in self.colalign] 3033 calign = calignsep.join(calign) 3034 # Align full table, set border and Column align (if any) 3035 topen = regex["_tableAlign"].sub(talign, topen) 3036 topen = regex["_tableBorder"].sub(tborder, topen) 3037 topen = regex["_tableColAlign"].sub(calign, topen) 3038 # Tex table spec, border or not: {|l|c|r|} , {lcr} 3039 if calignsep and not self.border: 3040 # Remove cell align separator 3041 topen = topen.replace(calignsep, "") 3042 return topen 3043 3044 def _get_cell_align(self, cells): 3045 ret = [] 3046 for cell in cells: 3047 align = "Left" 3048 if cell.strip(): 3049 if cell[0] == " " and cell[-1] == " ": 3050 align = "Center" 3051 elif cell[0] == " ": 3052 align = "Right" 3053 ret.append(align) 3054 return ret 3055 3056 def _get_cell_span(self, cells): 3057 ret = [] 3058 for cell in cells: 3059 span = 1 3060 m = re.search(r"\a(\|+)$", cell) 3061 if m: 3062 span = len(m.group(1)) + 1 3063 ret.append(span) 3064 return ret 3065 3066 def _tag_cells(self, rowdata): 3067 row = [] 3068 cells = rowdata["cells"] 3069 open_ = TAGS["tableCellOpen"] 3070 close = TAGS["tableCellClose"] 3071 sep = TAGS["tableCellSep"] 3072 calign = [TAGS["_tableCellAlign" + x] for x in rowdata["cellalign"]] 3073 calignsep = TAGS["tableColAlignSep"] 3074 ncolumns = len(self.colalign) 3075 3076 # Populate the span and multicol open tags 3077 cspan = [] 3078 multicol = [] 3079 colindex = 0 3080 for cellindex in range(0, len(rowdata["cellspan"])): 3081 3082 span = rowdata["cellspan"][cellindex] 3083 align = rowdata["cellalign"][cellindex] 3084 3085 if span > 1: 3086 cspan.append(regex["x"].sub(str(span), TAGS["_tableCellColSpan"])) 3087 3088 mcopen = regex["x"].sub(str(span), TAGS["_tableCellMulticolOpen"]) 3089 multicol.append(mcopen) 3090 else: 3091 cspan.append("") 3092 3093 if colindex < ncolumns and align != self.colalign[colindex]: 3094 mcopen = regex["x"].sub("1", TAGS["_tableCellMulticolOpen"]) 3095 multicol.append(mcopen) 3096 else: 3097 multicol.append("") 3098 3099 if not self.border: 3100 multicol[-1] = multicol[-1].replace(calignsep, "") 3101 3102 colindex += span 3103 3104 # Maybe is it a title row? 3105 if rowdata["title"]: 3106 open_ = TAGS["tableTitleCellOpen"] or open_ 3107 close = TAGS["tableTitleCellClose"] or close 3108 sep = TAGS["tableTitleCellSep"] or sep 3109 3110 # Should we break the line on *each* table cell? 3111 if rules["breaktablecell"]: 3112 close = close + "\n" 3113 3114 # Cells pre processing 3115 if rules["tablecellstrip"]: 3116 cells = [x.strip() for x in cells] 3117 if rowdata["title"] and rules["tabletitlerowinbold"]: 3118 cells = [enclose_me("fontBold", x) for x in cells] 3119 3120 # Add cell BEGIN/END tags 3121 for cell in cells: 3122 copen = open_ 3123 cclose = close 3124 # Make sure we will pop from some filled lists 3125 # Fixes empty line bug '| |' 3126 this_align = this_span = this_mcopen = "" 3127 if calign: 3128 this_align = calign.pop(0) 3129 if cspan: 3130 this_span = cspan.pop(0) 3131 if multicol: 3132 this_mcopen = multicol.pop(0) 3133 3134 # Insert cell align into open tag (if cell is alignable) 3135 if rules["tablecellaligntype"] == "cell": 3136 copen = regex["_tableCellAlign"].sub(this_align, copen) 3137 3138 # Insert cell span into open tag (if cell is spannable) 3139 if rules["tablecellspannable"]: 3140 copen = regex["_tableCellColSpan"].sub(this_span, copen) 3141 3142 # Use multicol tags instead (if multicol supported, and if 3143 # cell has a span or is aligned differently to column) 3144 if rules["tablecellmulticol"]: 3145 if this_mcopen: 3146 copen = regex["_tableColAlign"].sub(this_align, this_mcopen) 3147 cclose = TAGS["_tableCellMulticolClose"] 3148 3149 row.append(copen + cell + cclose) 3150 3151 # Maybe there are cell separators? 3152 return sep.join(row) 3153 3154 def add_row(self, cells): 3155 self.rows.append(cells) 3156 3157 def parse_row(self, line): 3158 # Default table properties 3159 ret = { 3160 "border": False, 3161 "title": False, 3162 "align": "Left", 3163 "cells": [], 3164 "cellalign": [], 3165 "cellspan": [], 3166 } 3167 # Detect table align (and remove spaces mark) 3168 if line[0] == " ": 3169 ret["align"] = "Center" 3170 line = line.lstrip() 3171 # Detect title mark 3172 if line[1] == "|": 3173 ret["title"] = True 3174 # Detect border mark and normalize the EOL 3175 m = re.search(r" (\|+) *$", line) 3176 if m: 3177 line += " " 3178 ret["border"] = True 3179 else: 3180 line += " | " 3181 # Delete table mark 3182 line = regex["table"].sub("", line) 3183 # Detect colspan | foo | bar baz ||| 3184 line = re.sub(r" (\|+)\| ", "\a\\1 | ", line) 3185 # Split cells (the last is fake) 3186 ret["cells"] = line.split(" | ")[:-1] 3187 # Find cells span 3188 ret["cellspan"] = self._get_cell_span(ret["cells"]) 3189 # Remove span ID 3190 ret["cells"] = [re.sub(r"\a\|+$", "", x) for x in ret["cells"]] 3191 # Find cells align 3192 ret["cellalign"] = self._get_cell_align(ret["cells"]) 3193 # Hooray! 3194 Debug("Table Prop: %s" % ret, 7) 3195 return ret 3196 3197 def dump(self): 3198 open_ = self._get_open_tag() 3199 rows = self.rows 3200 close = TAGS["tableClose"] 3201 3202 rowopen = TAGS["tableRowOpen"] 3203 rowclose = TAGS["tableRowClose"] 3204 rowsep = TAGS["tableRowSep"] 3205 titrowopen = TAGS["tableTitleRowOpen"] or rowopen 3206 titrowclose = TAGS["tableTitleRowClose"] or rowclose 3207 3208 if rules["breaktablelineopen"]: 3209 rowopen = rowopen + "\n" 3210 titrowopen = titrowopen + "\n" 3211 3212 # Tex gotchas 3213 if TARGET == "tex": 3214 if not self.border: 3215 rowopen = titrowopen = "" 3216 else: 3217 close = rowopen + close 3218 3219 # Now we tag all the table cells on each row 3220 tagged_cells = [self._tag_cells(cell) for cell in rows] 3221 3222 # Add row separator tags between lines 3223 tagged_rows = [] 3224 if rowsep: 3225 tagged_rows = [cell + rowsep for cell in tagged_cells] 3226 # Remove last rowsep, because the table is over 3227 tagged_rows[-1] = tagged_rows[-1].replace(rowsep, "") 3228 # Add row BEGIN/END tags for each line 3229 else: 3230 for rowdata in rows: 3231 if rowdata["title"]: 3232 o, c = titrowopen, titrowclose 3233 else: 3234 o, c = rowopen, rowclose 3235 row = tagged_cells.pop(0) 3236 tagged_rows.append(o + row + c) 3237 3238 # Join the pieces together 3239 fulltable = [] 3240 if open_: 3241 fulltable.append(open_) 3242 fulltable.extend(tagged_rows) 3243 if close: 3244 fulltable.append(close) 3245 3246 return fulltable 3247 3248 3249############################################################################## 3250 3251 3252class BlockMaster: 3253 "TIP: use blockin/out to add/del holders" 3254 3255 def __init__(self): 3256 self.BLK = [] 3257 self.HLD = [] 3258 self.PRP = [] 3259 self.depth = 0 3260 self.count = 0 3261 self.last = "" 3262 self.tableparser = None 3263 self.contains = { 3264 "para": ["comment", "raw", "tagged"], 3265 "verb": [], 3266 "table": ["comment"], 3267 "raw": [], 3268 "tagged": [], 3269 "comment": [], 3270 "quote": ["quote", "comment", "raw", "tagged"], 3271 "list": [ 3272 "list", 3273 "numlist", 3274 "deflist", 3275 "para", 3276 "verb", 3277 "comment", 3278 "raw", 3279 "tagged", 3280 ], 3281 "numlist": [ 3282 "list", 3283 "numlist", 3284 "deflist", 3285 "para", 3286 "verb", 3287 "comment", 3288 "raw", 3289 "tagged", 3290 ], 3291 "deflist": [ 3292 "list", 3293 "numlist", 3294 "deflist", 3295 "para", 3296 "verb", 3297 "comment", 3298 "raw", 3299 "tagged", 3300 ], 3301 "bar": [], 3302 "title": [], 3303 "numtitle": [], 3304 } 3305 self.allblocks = list(self.contains.keys()) 3306 3307 # If one is found inside another, ignore the marks 3308 self.exclusive = ["comment", "verb", "raw", "tagged"] 3309 3310 # May we include bars inside quotes? 3311 if rules["barinsidequote"]: 3312 self.contains["quote"].append("bar") 3313 3314 def block(self): 3315 if not self.BLK: 3316 return "" 3317 return self.BLK[-1] 3318 3319 def isblock(self, name=""): 3320 return self.block() == name 3321 3322 def prop(self, key): 3323 if not self.PRP: 3324 return "" 3325 return self.PRP[-1].get(key) or "" 3326 3327 def propset(self, key, val): 3328 self.PRP[-1][key] = val 3329 # Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1) 3330 # Debug('BLOCK props: %s'%(repr(self.PRP)), 1) 3331 3332 def hold(self): 3333 if not self.HLD: 3334 return [] 3335 return self.HLD[-1] 3336 3337 def holdadd(self, line): 3338 if self.block().endswith("list"): 3339 line = [line] 3340 self.HLD[-1].append(line) 3341 Debug("HOLD add: %s" % repr(line), 4) 3342 Debug("FULL HOLD: %s" % self.HLD, 4) 3343 3344 def holdaddsub(self, line): 3345 self.HLD[-1][-1].append(line) 3346 Debug("HOLD addsub: %s" % repr(line), 4) 3347 Debug("FULL HOLD: %s" % self.HLD, 4) 3348 3349 def holdextend(self, lines): 3350 if self.block().endswith("list"): 3351 lines = [lines] 3352 self.HLD[-1].extend(lines) 3353 Debug("HOLD extend: %s" % repr(lines), 4) 3354 Debug("FULL HOLD: %s" % self.HLD, 4) 3355 3356 def blockin(self, block): 3357 ret = [] 3358 if block not in self.allblocks: 3359 Error("Invalid block '%s'" % block) 3360 3361 # First, let's close other possible open blocks 3362 while self.block() and block not in self.contains[self.block()]: 3363 ret.extend(self.blockout()) 3364 3365 # Now we can gladly add this new one 3366 self.BLK.append(block) 3367 self.HLD.append([]) 3368 self.PRP.append({}) 3369 self.count += 1 3370 if block == "table": 3371 self.tableparser = TableMaster() 3372 # Deeper and deeper 3373 self.depth = len(self.BLK) 3374 Debug("block ++ ({}): {}".format(block, self.BLK), 3) 3375 return ret 3376 3377 def blockout(self): 3378 if not self.BLK: 3379 Error("No block to pop") 3380 blockname = self.BLK.pop() 3381 result = getattr(self, blockname)() 3382 parsed = self.HLD.pop() 3383 self.PRP.pop() 3384 self.depth = len(self.BLK) 3385 if blockname == "table": 3386 del self.tableparser 3387 3388 # Inserting a nested block into mother 3389 if self.block(): 3390 if blockname != "comment": # ignore comment blocks 3391 if self.block().endswith("list"): 3392 self.HLD[-1][-1].append(result) 3393 else: 3394 self.HLD[-1].append(result) 3395 # Reset now. Mother block will have it all 3396 result = [] 3397 3398 Debug("block -- ({}): {}".format(blockname, self.BLK), 3) 3399 Debug("RELEASED ({}): {}".format(blockname, parsed), 3) 3400 3401 # Save this top level block name (produced output) 3402 # The next block will use it 3403 if result: 3404 self.last = blockname 3405 Debug("BLOCK: %s" % result, 6) 3406 3407 return result 3408 3409 def _last_escapes(self, line): 3410 return doFinalEscape(TARGET, line) 3411 3412 def _get_escaped_hold(self): 3413 ret = [] 3414 for line in self.hold(): 3415 if isinstance(line, list): 3416 ret.extend(line) 3417 else: 3418 ret.append(self._last_escapes(line)) 3419 return ret 3420 3421 def _remove_twoblanks(self, lastitem): 3422 if len(lastitem) > 1 and lastitem[-2:] == ["", ""]: 3423 return lastitem[:-2] 3424 return lastitem 3425 3426 def _should_add_blank_line(self, where, blockname): 3427 "Validates the blanksaround* rules" 3428 3429 # Nestable blocks: only mother blocks (level 1) are spaced 3430 if blockname.endswith("list") and self.depth > 1: 3431 return False 3432 3433 # The blank line after the block is always added 3434 if where == "after" and rules["blanksaround" + blockname]: 3435 return True 3436 3437 # The blank line before the block is only added if 3438 # the previous block haven't added a blank line 3439 # (to avoid consecutive blanks) 3440 elif ( 3441 where == "before" 3442 and rules["blanksaround" + blockname] 3443 and not rules.get("blanksaround" + self.last) 3444 ): 3445 return True 3446 3447 # Nested quotes are handled here, 3448 # because the mother quote isn't closed yet 3449 elif ( 3450 where == "before" 3451 and blockname == "quote" 3452 and rules["blanksaround" + blockname] 3453 and self.depth > 1 3454 ): 3455 return True 3456 3457 return False 3458 3459 def comment(self): 3460 return "" 3461 3462 def raw(self): 3463 lines = self.hold() 3464 return [doEscape(TARGET, x) for x in lines] 3465 3466 def tagged(self): 3467 return self.hold() 3468 3469 def para(self): 3470 result = [] 3471 open_ = TAGS["paragraphOpen"] 3472 close = TAGS["paragraphClose"] 3473 lines = self._get_escaped_hold() 3474 3475 # Blank line before? 3476 if self._should_add_blank_line("before", "para"): 3477 result.append("") 3478 3479 # Open tag 3480 if open_: 3481 result.append(open_) 3482 3483 # Pagemaker likes a paragraph as a single long line 3484 if rules["onelinepara"]: 3485 result.append(" ".join(lines)) 3486 # Others are normal :) 3487 else: 3488 result.extend(lines) 3489 3490 # Close tag 3491 if close: 3492 result.append(close) 3493 3494 # Blank line after? 3495 if self._should_add_blank_line("after", "para"): 3496 result.append("") 3497 3498 return result 3499 3500 def verb(self): 3501 "Verbatim lines are not masked, so there's no need to unmask" 3502 result = [] 3503 open_ = TAGS["blockVerbOpen"] 3504 close = TAGS["blockVerbClose"] 3505 3506 # Blank line before? 3507 if self._should_add_blank_line("before", "verb"): 3508 result.append("") 3509 3510 # Open tag 3511 if open_: 3512 result.append(open_) 3513 3514 # Get contents 3515 for line in self.hold(): 3516 if not rules["verbblocknotescaped"]: 3517 line = doEscape(TARGET, line) 3518 if TAGS["blockVerbLine"]: 3519 line = TAGS["blockVerbLine"] + line 3520 if rules["indentverbblock"]: 3521 line = " " + line 3522 if rules["verbblockfinalescape"]: 3523 line = doFinalEscape(TARGET, line) 3524 result.append(line) 3525 3526 # Close tag 3527 if close: 3528 result.append(close) 3529 3530 # Blank line after? 3531 if self._should_add_blank_line("after", "verb"): 3532 result.append("") 3533 3534 return result 3535 3536 def numtitle(self): 3537 return self.title("numtitle") 3538 3539 def title(self, name="title"): 3540 result = [] 3541 3542 # Blank line before? 3543 if self._should_add_blank_line("before", name): 3544 result.append("") 3545 3546 # Get contents 3547 result.extend(TITLE.get()) 3548 3549 # Blank line after? 3550 if self._should_add_blank_line("after", name): 3551 result.append("") 3552 3553 return result 3554 3555 def table(self): 3556 result = [] 3557 3558 # Blank line before? 3559 if self._should_add_blank_line("before", "table"): 3560 result.append("") 3561 3562 # Rewrite all table cells by the unmasked and escaped data 3563 lines = self._get_escaped_hold() 3564 for i in range(len(lines)): 3565 cells = lines[i].split(SEPARATOR) 3566 self.tableparser.rows[i]["cells"] = cells 3567 result.extend(self.tableparser.dump()) 3568 3569 # Blank line after? 3570 if self._should_add_blank_line("after", "table"): 3571 result.append("") 3572 3573 return result 3574 3575 def quote(self): 3576 result = [] 3577 open_ = TAGS["blockQuoteOpen"] # block based 3578 close = TAGS["blockQuoteClose"] 3579 qline = TAGS["blockQuoteLine"] # line based 3580 indent = tagindent = "\t" * self.depth 3581 3582 # Apply rules 3583 if rules["tagnotindentable"]: 3584 tagindent = "" 3585 if not rules["keepquoteindent"]: 3586 indent = "" 3587 3588 # Blank line before? 3589 if self._should_add_blank_line("before", "quote"): 3590 result.append("") 3591 3592 # Open tag 3593 if open_: 3594 result.append(tagindent + open_) 3595 3596 # Get contents 3597 for item in self.hold(): 3598 if isinstance(item, list): 3599 result.extend(item) # subquotes 3600 else: 3601 item = regex["quote"].sub("", item) # del TABs 3602 item = self._last_escapes(item) 3603 item = qline * self.depth + item 3604 result.append(indent + item) # quote line 3605 3606 # Close tag 3607 if close: 3608 result.append(tagindent + close) 3609 3610 # Blank line after? 3611 if self._should_add_blank_line("after", "quote"): 3612 result.append("") 3613 3614 return result 3615 3616 def bar(self): 3617 result = [] 3618 bar_tag = "" 3619 3620 # Blank line before? 3621 if self._should_add_blank_line("before", "bar"): 3622 result.append("") 3623 3624 # Get the original bar chars 3625 bar_chars = self.hold()[0].strip() 3626 3627 # Set bar type 3628 if bar_chars.startswith("="): 3629 bar_tag = TAGS["bar2"] 3630 else: 3631 bar_tag = TAGS["bar1"] 3632 3633 # To avoid comment tag confusion like <!-- ------ --> (sgml) 3634 if TAGS["comment"].count("--"): 3635 bar_chars = bar_chars.replace("--", "__") 3636 3637 # Get the bar tag (may contain \a) 3638 result.append(regex["x"].sub(bar_chars, bar_tag)) 3639 3640 # Blank line after? 3641 if self._should_add_blank_line("after", "bar"): 3642 result.append("") 3643 3644 return result 3645 3646 def deflist(self): 3647 return self.list("deflist") 3648 3649 def numlist(self): 3650 return self.list("numlist") 3651 3652 def list(self, name="list"): 3653 result = [] 3654 items = self.hold() 3655 indent = self.prop("indent") 3656 tagindent = indent 3657 listline = TAGS.get(name + "ItemLine") 3658 itemcount = 0 3659 3660 if name == "deflist": 3661 itemopen = TAGS[name + "Item1Open"] 3662 itemclose = TAGS[name + "Item2Close"] 3663 itemsep = TAGS[name + "Item1Close"] + TAGS[name + "Item2Open"] 3664 else: 3665 itemopen = TAGS[name + "ItemOpen"] 3666 itemclose = TAGS[name + "ItemClose"] 3667 itemsep = "" 3668 3669 # Apply rules 3670 if rules["tagnotindentable"]: 3671 tagindent = "" 3672 if not rules["keeplistindent"]: 3673 indent = tagindent = "" 3674 3675 # ItemLine: number of leading chars identifies list depth 3676 if listline: 3677 itemopen = listline * self.depth + itemopen 3678 3679 # Adds trailing space on opening tags 3680 if (name == "list" and rules["spacedlistitemopen"]) or ( 3681 name == "numlist" and rules["spacednumlistitemopen"] 3682 ): 3683 itemopen = itemopen + " " 3684 3685 # Remove two-blanks from list ending mark, to avoid <p> 3686 items[-1] = self._remove_twoblanks(items[-1]) 3687 3688 # Blank line before? 3689 if self._should_add_blank_line("before", name): 3690 result.append("") 3691 3692 # Tag each list item (multiline items), store in listbody 3693 itemopenorig = itemopen 3694 listbody = [] 3695 widelist = 0 3696 for item in items: 3697 3698 # Add "manual" item count for noautonum targets 3699 itemcount += 1 3700 if name == "numlist" and not rules["autonumberlist"]: 3701 n = str(itemcount) 3702 itemopen = regex["x"].sub(n, itemopenorig) 3703 del n 3704 3705 # Tag it 3706 item[0] = self._last_escapes(item[0]) 3707 if name == "deflist": 3708 _, term, rest = item[0].split(SEPARATOR, 2) 3709 item[0] = rest 3710 if not item[0]: 3711 del item[0] # to avoid <p> 3712 listbody.append(tagindent + itemopen + term + itemsep) 3713 else: 3714 fullitem = tagindent + itemopen 3715 listbody.append(item[0].replace(SEPARATOR, fullitem)) 3716 del item[0] 3717 3718 # Process next lines for this item (if any) 3719 for line in item: 3720 if isinstance(line, list): # sublist inside 3721 listbody.extend(line) 3722 else: 3723 line = self._last_escapes(line) 3724 3725 # Blank lines turns to <p> 3726 if not line and rules["parainsidelist"]: 3727 line = indent + TAGS["paragraphOpen"] + TAGS["paragraphClose"] 3728 line = line.rstrip() 3729 widelist = 1 3730 3731 # Some targets don't like identation here (wiki) 3732 if not rules["keeplistindent"] or ( 3733 name == "deflist" and rules["deflisttextstrip"] 3734 ): 3735 line = line.lstrip() 3736 3737 # Maybe we have a line prefix to add? (wiki) 3738 if name == "deflist" and TAGS["deflistItem2LinePrefix"]: 3739 line = TAGS["deflistItem2LinePrefix"] + line 3740 3741 listbody.append(line) 3742 3743 # Close item (if needed) 3744 if itemclose: 3745 listbody.append(tagindent + itemclose) 3746 3747 if not widelist and rules["compactlist"]: 3748 listopen = TAGS.get(name + "OpenCompact") 3749 listclose = TAGS.get(name + "CloseCompact") 3750 else: 3751 listopen = TAGS.get(name + "Open") 3752 listclose = TAGS.get(name + "Close") 3753 3754 # Open list (not nestable lists are only opened at mother) 3755 if listopen and not (rules["listnotnested"] and BLOCK.depth != 1): 3756 result.append(tagindent + listopen) 3757 3758 result.extend(listbody) 3759 3760 # Close list (not nestable lists are only closed at mother) 3761 if listclose and not (rules["listnotnested"] and self.depth != 1): 3762 result.append(tagindent + listclose) 3763 3764 # Blank line after? 3765 if self._should_add_blank_line("after", name): 3766 result.append("") 3767 3768 return result 3769 3770 3771############################################################################## 3772 3773 3774def listTargets(): 3775 """List available targets.""" 3776 for target, name in sorted(TARGET_NAMES.items()): 3777 print("{:8}{}".format(target, name)) 3778 3779 3780def get_file_body(file_): 3781 "Returns all the document BODY lines" 3782 return process_source_file(file_, noconf=1)[1][2] 3783 3784 3785def finish_him(outlist, config): 3786 "Writing output to screen or file" 3787 outfile = config["outfile"] 3788 outlist = unmaskEscapeChar(outlist) 3789 outlist = expandLineBreaks(outlist) 3790 3791 # Apply PostProc filters 3792 if config["postproc"]: 3793 filters = compile_filters(config["postproc"], "Invalid PostProc filter regex") 3794 postoutlist = [] 3795 errmsg = "Invalid PostProc filter replacement" 3796 for line in outlist: 3797 for rgx, repl in filters: 3798 try: 3799 line = rgx.sub(repl, line) 3800 except Exception: 3801 Error("{}: '{}'".format(errmsg, repl)) 3802 postoutlist.append(line) 3803 outlist = postoutlist[:] 3804 3805 if outfile == MODULEOUT: 3806 return outlist 3807 elif outfile == STDOUT: 3808 Message("Saving results to the output file", 1) 3809 for line in outlist: 3810 print(line) 3811 else: 3812 Message("Saving results to the output file", 1) 3813 Savefile(outfile, outlist) 3814 if not QUIET: 3815 print("{} wrote {}".format(my_name, outfile)) 3816 3817 3818def toc_tagger(toc, config): 3819 "Returns the tagged TOC, as a single tag or a tagged list" 3820 if not config["toc"]: 3821 return [] 3822 elif TAGS["TOC"]: 3823 # Our TOC list is not needed, the target already knows how to do a TOC 3824 ret = [TAGS["TOC"]] 3825 # Convert the TOC list (t2t-marked) to the target's list format 3826 else: 3827 fakeconf = config.copy() 3828 fakeconf["headers"] = 0 3829 fakeconf["preproc"] = [] 3830 fakeconf["postproc"] = [] 3831 ret, _ = convert(toc, fakeconf) 3832 set_global_config(config) # restore config 3833 return ret 3834 3835 3836def toc_formatter(toc, config): 3837 "Formats TOC for automatic placement between headers and body" 3838 3839 if not config["toc"]: 3840 return [] # TOC disabled 3841 ret = toc 3842 3843 # TOC open/close tags (if any) 3844 if TAGS["tocOpen"]: 3845 ret.insert(0, TAGS["tocOpen"]) 3846 if TAGS["tocClose"]: 3847 ret.append(TAGS["tocClose"]) 3848 3849 # Autotoc specific formatting 3850 if rules["autotocwithbars"]: # TOC between bars 3851 para = TAGS["paragraphOpen"] + TAGS["paragraphClose"] 3852 bar = regex["x"].sub("-" * DFT_TEXT_WIDTH, TAGS["bar1"]) 3853 tocbar = [para, bar, para] 3854 ret = tocbar + ret + tocbar 3855 if rules["blankendautotoc"]: # blank line after TOC 3856 ret.append("") 3857 if rules["autotocnewpagebefore"]: # page break before TOC 3858 ret.insert(0, TAGS["pageBreak"]) 3859 if rules["autotocnewpageafter"]: # page break after TOC 3860 ret.append(TAGS["pageBreak"]) 3861 return ret 3862 3863 3864def doHeader(headers, config): 3865 if not config["headers"]: 3866 return [] 3867 if not headers: 3868 headers = ["", "", ""] 3869 target = config["target"] 3870 3871 template = HEADER_TEMPLATE[target].split("\n") 3872 3873 style = config.get("style") 3874 # Tex: strip .sty extension from each style filename. 3875 if target == "tex": 3876 style = [os.path.splitext(x)[0] for x in style] 3877 3878 head_data = {"STYLE": style, "ENCODING": get_encoding_string(target)} 3879 3880 # Parse header contents 3881 for i in 0, 1, 2: 3882 contents = headers[i] 3883 # Escapes - on tex, just do it if any \tag{} present 3884 if target != "tex" or (target == "tex" and re.search(r"\\\w+{", contents)): 3885 contents = doEscape(target, contents) 3886 if target in ["lout", "tex"]: 3887 contents = doFinalEscape(target, contents) 3888 3889 head_data["HEADER%d" % (i + 1)] = contents 3890 3891 Debug("Header Data: %s" % head_data, 1) 3892 3893 # Scan for empty dictionary keys 3894 # If found, scan template lines for that key reference 3895 # If found, remove the reference 3896 # If there isn't any other key reference on the same line, remove it 3897 # TODO loop by template line > key 3898 for key, value in head_data.items(): 3899 if value: 3900 continue 3901 for line in template: 3902 if line.count("%%(%s)s" % key): 3903 sline = line.replace("%%(%s)s" % key, "") 3904 if not re.search(r"%\([A-Z0-9]+\)s", sline): 3905 template.remove(line) 3906 # Style is a multiple tag. 3907 # - If none or just one, use default template 3908 # - If two or more, insert extra lines in a loop (and remove original) 3909 styles = head_data["STYLE"] 3910 if len(styles) == 1: 3911 head_data["STYLE"] = styles[0] 3912 elif len(styles) > 1: 3913 style_mark = "%(STYLE)s" 3914 for i in range(len(template)): 3915 if template[i].count(style_mark): 3916 while styles: 3917 template.insert( 3918 i + 1, template[i].replace(style_mark, styles.pop()) 3919 ) 3920 del template[i] 3921 break 3922 # Populate template with data (dict expansion) 3923 template = "\n".join(template) % head_data 3924 3925 return template.split("\n") 3926 3927 3928def doFooter(config): 3929 ret = [] 3930 3931 # No footer. The --no-headers option hides header AND footer 3932 if not config["headers"]: 3933 return [] 3934 3935 # Only add blank line before footer if last block doesn't added by itself 3936 if not rules.get("blanksaround" + BLOCK.last): 3937 ret.append("") 3938 3939 # Maybe we have a specific tag to close the document? 3940 if TAGS["EOD"]: 3941 ret.append(TAGS["EOD"]) 3942 3943 return ret 3944 3945 3946def doEscape(target, txt): 3947 "Target-specific special escapes. Apply *before* insert any tag." 3948 tmpmask = "vvvvThisEscapingSuxvvvv" 3949 if target in ("html", "sgml", "dbk"): 3950 txt = re.sub("&", "&", txt) 3951 txt = re.sub("<", "<", txt) 3952 txt = re.sub(">", ">", txt) 3953 if target == "sgml": 3954 txt = re.sub("\xff", "ÿ", txt) # "+y 3955 elif target == "mgp": 3956 txt = re.sub("^%", " %", txt) # add leading blank to avoid parse 3957 elif target == "man": 3958 txt = re.sub("^([.'])", "\\&\\1", txt) # command ID 3959 txt = txt.replace(ESCCHAR, ESCCHAR + "e") # \e 3960 elif target == "lout": 3961 # TIP: / moved to FinalEscape to avoid //italic// 3962 # TIP: these are also converted by lout: ... --- -- 3963 txt = txt.replace(ESCCHAR, tmpmask) # \ 3964 txt = txt.replace('"', '"%s""' % ESCCHAR) # "\"" 3965 txt = re.sub("([|&{}@#^~])", '"\\1"', txt) # "@" 3966 txt = txt.replace(tmpmask, '"%s"' % (ESCCHAR * 2)) # "\\" 3967 elif target == "tex": 3968 # Mark literal \ to be changed to $\backslash$ later 3969 txt = txt.replace(ESCCHAR, tmpmask) 3970 txt = re.sub("([#$&%{}])", ESCCHAR + r"\1", txt) # \% 3971 txt = re.sub("([~^])", ESCCHAR + r"\1{}", txt) # \~{} 3972 txt = re.sub("([<|>])", r"$\1$", txt) # $>$ 3973 txt = txt.replace(tmpmask, maskEscapeChar(r"$\backslash$")) 3974 # TIP the _ is escaped at the end 3975 return txt 3976 3977 3978# TODO man: where - really needs to be escaped? 3979def doFinalEscape(target, txt): 3980 "Last escapes of each line" 3981 if target == "man": 3982 txt = txt.replace("-", r"\-") 3983 elif target == "sgml": 3984 txt = txt.replace("[", "[") 3985 elif target == "lout": 3986 txt = txt.replace("/", '"/"') 3987 elif target == "tex": 3988 txt = txt.replace("_", r"\_") 3989 txt = txt.replace("vvvvTexUndervvvv", "_") # shame! 3990 txt = txt.replace("vvvUnderscoreInRawTextvvv", "_") 3991 txt = txt.replace("vvvUnderscoreInTaggedTextvvv", "_") 3992 return txt 3993 3994 3995def EscapeCharHandler(action, data): 3996 "Mask/Unmask the Escape Char on the given string" 3997 if not data.strip(): 3998 return data 3999 if action not in ("mask", "unmask"): 4000 Error("EscapeCharHandler: Invalid action '%s'" % action) 4001 if action == "mask": 4002 return data.replace("\\", ESCCHAR) 4003 else: 4004 return data.replace(ESCCHAR, "\\") 4005 4006 4007def maskEscapeChar(data): 4008 "Replace any escape char with a text mask (Input: str or list)" 4009 if isinstance(data, list): 4010 return [EscapeCharHandler("mask", x) for x in data] 4011 return EscapeCharHandler("mask", data) 4012 4013 4014def unmaskEscapeChar(data): 4015 "Undo the escape char masking (Input: str or list)" 4016 if isinstance(data, list): 4017 return [EscapeCharHandler("unmask", x) for x in data] 4018 return EscapeCharHandler("unmask", data) 4019 4020 4021# Convert ['foo\nbar'] to ['foo', 'bar'] 4022def expandLineBreaks(mylist): 4023 ret = [] 4024 for line in mylist: 4025 ret.extend(line.split("\n")) 4026 return ret 4027 4028 4029def compile_filters(filters, errmsg="Filter"): 4030 if filters: 4031 for i in range(len(filters)): 4032 patt, repl = filters[i] 4033 try: 4034 rgx = re.compile(patt) 4035 except Exception: 4036 Error("{}: '{}'".format(errmsg, patt)) 4037 filters[i] = (rgx, repl) 4038 return filters 4039 4040 4041def enclose_me(tagname, txt): 4042 return TAGS.get(tagname + "Open") + txt + TAGS.get(tagname + "Close") 4043 4044 4045def beautify_me(name, font, line): 4046 "where name is: bold, italic, underline or strike" 4047 4048 # Exception: Doesn't parse an horizontal bar as strike 4049 if name == "strike" and regex["bar"].search(line): 4050 return line 4051 4052 open_ = TAGS["%sOpen" % font] 4053 close = TAGS["%sClose" % font] 4054 txt = r"{}\1{}".format(open_, close) 4055 line = regex[font].sub(txt, line) 4056 return line 4057 4058 4059def get_tagged_link(label, url): 4060 ret = "" 4061 target = CONF["target"] 4062 image_re = regex["img"] 4063 4064 # Set link type 4065 if regex["email"].match(url): 4066 linktype = "email" 4067 else: 4068 linktype = "url" 4069 4070 # Escape specials from TEXT parts 4071 label = doEscape(target, label) 4072 4073 # Escape specials from link URL 4074 if not rules["linkable"] or rules["escapeurl"]: 4075 url = doEscape(target, url) 4076 4077 # Adding protocol to guessed link 4078 guessurl = "" 4079 if linktype == "url" and re.match("(?i)" + regex["_urlskel"]["guess"], url): 4080 if url[0] in "Ww": 4081 guessurl = "http://" + url 4082 else: 4083 guessurl = "ftp://" + url 4084 4085 # Not link aware targets -> protocol is useless 4086 if not rules["linkable"]: 4087 guessurl = "" 4088 4089 # Simple link (not guessed) 4090 if not label and not guessurl: 4091 # Just add link data to tag 4092 tag = TAGS[linktype] 4093 ret = regex["x"].sub(url, tag) 4094 4095 # Named link or guessed simple link 4096 else: 4097 # Adjusts for guessed link 4098 if not label: 4099 label = url # no protocol 4100 if guessurl: 4101 url = guessurl # with protocol 4102 4103 # Image inside link! 4104 if image_re.match(label): 4105 if rules["imglinkable"]: # get image tag 4106 label = parse_images(label) 4107 else: 4108 # img@link !supported 4109 label = "(%s)" % image_re.match(label).group(1) 4110 4111 # Putting data on the right appearance order 4112 if rules["labelbeforelink"] or not rules["linkable"]: 4113 urlorder = [label, url] # label before link 4114 else: 4115 urlorder = [url, label] # link before label 4116 4117 # Add link data to tag (replace \a's) 4118 ret = TAGS["%sMark" % linktype] 4119 for data in urlorder: 4120 ret = regex["x"].sub(data, ret, 1) 4121 4122 return ret 4123 4124 4125def parse_deflist_term(line): 4126 "Extract and parse definition list term contents" 4127 img_re = regex["img"] 4128 term = regex["deflist"].search(line).group(3) 4129 4130 # Mask image inside term as (image.jpg), where not supported 4131 if not rules["imgasdefterm"] and img_re.search(term): 4132 while img_re.search(term): 4133 imgfile = img_re.search(term).group(1) 4134 term = img_re.sub("(%s)" % imgfile, term, 1) 4135 4136 # TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :( 4137 return term 4138 4139 4140def get_image_align(line): 4141 "Return the image (first found) align for the given line" 4142 4143 # First clear marks that can mess align detection 4144 line = re.sub(SEPARATOR + "$", "", line) # remove deflist sep 4145 line = re.sub("^" + SEPARATOR, "", line) # remove list sep 4146 line = re.sub("^[\t]+", "", line) # remove quote mark 4147 4148 # Get image position on the line 4149 m = regex["img"].search(line) 4150 ini = m.start() 4151 head = 0 4152 end = m.end() 4153 tail = len(line) 4154 4155 # The align detection algorithm 4156 if ini == head and end != tail: 4157 align = "left" # ^img + text$ 4158 elif ini != head and end == tail: 4159 align = "right" # ^text + img$ 4160 else: 4161 align = "center" # default align 4162 4163 # Some special cases 4164 if BLOCK.isblock("table"): 4165 align = "center" # ignore when table 4166 4167 return align 4168 4169 4170def get_encoding_string(target): 4171 return "utf8" if target == "tex" else "utf-8" 4172 4173 4174def process_source_file(file_="", noconf=0, contents=None): 4175 """ 4176 Find and Join all the configuration available for a source file. 4177 No sanity checking is done on this step. 4178 It also extracts the source document parts into separate holders. 4179 4180 The config scan order is: 4181 1. The user configuration file (i.e. $HOME/.txt2tagsrc) 4182 2. The source document's CONF area 4183 3. The command line options 4184 4185 The return data is a tuple of two items: 4186 1. The parsed config dictionary 4187 2. The document's parts, as a (head, conf, body) tuple 4188 4189 All the conversion process will be based on the data and 4190 configuration returned by this function. 4191 The source file is read in this step only. 4192 """ 4193 if contents: 4194 source = SourceDocument(contents=contents) 4195 else: 4196 source = SourceDocument(file_) 4197 head, conf, body = source.split() 4198 Message("Source document contents stored", 2) 4199 if not noconf: 4200 # Read document config 4201 source_raw = source.get_raw_config() 4202 # Join all the config directives found, then parse it 4203 full_raw = RC_RAW + source_raw + CMDLINE_RAW 4204 Message("Parsing and saving all config found (%03d items)" % (len(full_raw)), 1) 4205 full_parsed = ConfigMaster(full_raw).parse() 4206 # Add manually the filename to the conf dic 4207 if contents: 4208 full_parsed["sourcefile"] = MODULEIN 4209 full_parsed["infile"] = MODULEIN 4210 full_parsed["outfile"] = MODULEOUT 4211 else: 4212 full_parsed["sourcefile"] = file_ 4213 Debug("Complete config: %s" % full_parsed, 1) 4214 else: 4215 full_parsed = {} 4216 return full_parsed, (head, conf, body) 4217 4218 4219def convert_file(headers, body, config, first_body_lineno=1): 4220 config = ConfigMaster().sanity(config) 4221 # Compose the target file Headers 4222 # TODO escape line before? 4223 # TODO see exceptions by tex and mgp 4224 Message("Composing target Headers", 1) 4225 target_head = doHeader(headers, config) 4226 # Parse the full marked body into tagged target 4227 4228 Message("Composing target Body", 1) 4229 target_body, marked_toc = convert(body, config, firstlinenr=first_body_lineno) 4230 4231 # Compose the target file Footer 4232 Message("Composing target Footer", 1) 4233 target_foot = doFooter(config) 4234 4235 # Make TOC (if needed) 4236 Message("Composing target TOC", 1) 4237 tagged_toc = toc_tagger(marked_toc, config) 4238 target_toc = toc_formatter(tagged_toc, config) 4239 4240 # Finally, we have our document 4241 outlist = target_head + target_toc + target_body + target_foot 4242 return finish_him(outlist, config) 4243 4244 4245def parse_images(line): 4246 "Tag all images found" 4247 while regex["img"].search(line) and TAGS["img"] != "[\a]": 4248 txt = regex["img"].search(line).group(1) 4249 tag = TAGS["img"] 4250 4251 # If target supports image alignment, here we go 4252 if rules["imgalignable"]: 4253 4254 align = get_image_align(line) # right 4255 align_name = align.capitalize() # Right 4256 4257 # The align is a full tag, or part of the image tag (~A~) 4258 if TAGS["imgAlign" + align_name]: 4259 tag = TAGS["imgAlign" + align_name] 4260 else: 4261 align_tag = TAGS["_imgAlign" + align_name] 4262 tag = regex["_imgAlign"].sub(align_tag, tag, 1) 4263 4264 if TARGET == "tex": 4265 tag = re.sub(r"\\b", r"\\\\b", tag) 4266 txt = txt.replace("_", "vvvvTexUndervvvv") 4267 4268 # Ugly hack to avoid infinite loop when target's image tag contains [] 4269 tag = tag.replace("[", "vvvvEscapeSquareBracketvvvv") 4270 4271 line = regex["img"].sub(tag, line, 1) 4272 line = regex["x"].sub(txt, line, 1) 4273 return line.replace("vvvvEscapeSquareBracketvvvv", "[") 4274 4275 4276def add_inline_tags(line): 4277 # Beautifiers 4278 for beauti, font in [ 4279 ("bold", "fontBold"), 4280 ("italic", "fontItalic"), 4281 ("underline", "fontUnderline"), 4282 ("strike", "fontStrike"), 4283 ]: 4284 if regex[font].search(line): 4285 line = beautify_me(beauti, font, line) 4286 4287 line = parse_images(line) 4288 return line 4289 4290 4291def get_include_contents(file_, path=""): 4292 "Parses %!include: value and extract file contents" 4293 ids = {"`": "verb", '"': "raw", "'": "tagged"} 4294 id_ = "t2t" 4295 # Set include type and remove identifier marks 4296 mark = file_[0] 4297 if mark in ids.keys(): 4298 if file_[:2] == file_[-2:] == mark * 2: 4299 id_ = ids[mark] # set type 4300 file_ = file_[2:-2] # remove marks 4301 # Handle remote dir execution 4302 filepath = os.path.join(path, file_) 4303 # Read included file contents 4304 lines = Readfile(filepath) 4305 # Default txt2tags marked text, just BODY matters 4306 if id_ == "t2t": 4307 lines = get_file_body(filepath) 4308 # TODO fix images relative path if file has a path, ie.: 4309 # chapter1/index.t2t (wait until tree parsing) 4310 # TODO for the images path fix, also respect outfile path, 4311 # if different from infile (wait until tree parsing) 4312 lines.insert(0, "%INCLUDED({}) starts here: {}".format(id_, file_)) 4313 # This appears when included hit EOF with verbatim area open 4314 # lines.append('%%INCLUDED(%s) ends here: %s'%(id_,file_)) 4315 return id_, lines 4316 4317 4318def set_global_config(config): 4319 global CONF, TAGS, regex, rules, TARGET 4320 CONF = config 4321 rules = getRules(CONF) 4322 TAGS = getTags(CONF) 4323 regex = getRegexes() 4324 TARGET = config["target"] # save for buggy functions that need global 4325 4326 4327def convert(bodylines, config, firstlinenr=1): 4328 global BLOCK, TITLE 4329 4330 set_global_config(config) 4331 4332 target = config["target"] 4333 BLOCK = BlockMaster() 4334 MASK = MaskMaster() 4335 TITLE = TitleMaster() 4336 4337 ret = [] 4338 f_lastwasblank = 0 4339 4340 # Compiling all PreProc regexes 4341 pre_filter = compile_filters(CONF["preproc"], "Invalid PreProc filter regex") 4342 4343 # Let's mark it up! 4344 linenr = firstlinenr - 1 4345 lineref = 0 4346 while lineref < len(bodylines): 4347 # Defaults 4348 MASK.reset() 4349 results_box = "" 4350 4351 untouchedline = bodylines[lineref] 4352 4353 line = re.sub("[\n\r]+$", "", untouchedline) # del line break 4354 4355 # Apply PreProc filters 4356 if pre_filter: 4357 errmsg = "Invalid PreProc filter replacement" 4358 for rgx, repl in pre_filter: 4359 try: 4360 line = rgx.sub(repl, line) 4361 except Exception: 4362 Error("{}: '{}'".format(errmsg, repl)) 4363 4364 line = maskEscapeChar(line) # protect \ char 4365 linenr += 1 4366 lineref += 1 4367 4368 Debug(repr(line), 2, linenr) # heavy debug: show each line 4369 4370 # ------------------[ Comment Block ]------------------------ 4371 4372 # We're already on a comment block 4373 if BLOCK.block() == "comment": 4374 4375 # Closing comment 4376 if regex["blockCommentClose"].search(line): 4377 ret.extend(BLOCK.blockout() or []) 4378 continue 4379 4380 # Normal comment-inside line. Ignore it. 4381 continue 4382 4383 # Detecting comment block init 4384 if ( 4385 regex["blockCommentOpen"].search(line) 4386 and BLOCK.block() not in BLOCK.exclusive 4387 ): 4388 ret.extend(BLOCK.blockin("comment")) 4389 continue 4390 4391 # -------------------------[ Tagged Text ]---------------------- 4392 4393 # We're already on a tagged block 4394 if BLOCK.block() == "tagged": 4395 4396 # Closing tagged 4397 if regex["blockTaggedClose"].search(line): 4398 ret.extend(BLOCK.blockout()) 4399 continue 4400 4401 # Normal tagged-inside line 4402 BLOCK.holdadd(line) 4403 continue 4404 4405 # Detecting tagged block init 4406 if ( 4407 regex["blockTaggedOpen"].search(line) 4408 and BLOCK.block() not in BLOCK.exclusive 4409 ): 4410 ret.extend(BLOCK.blockin("tagged")) 4411 continue 4412 4413 # One line tagged text 4414 if regex["1lineTagged"].search(line) and BLOCK.block() not in BLOCK.exclusive: 4415 ret.extend(BLOCK.blockin("tagged")) 4416 line = regex["1lineTagged"].sub("", line) 4417 BLOCK.holdadd(line) 4418 ret.extend(BLOCK.blockout()) 4419 continue 4420 4421 # -------------------------[ Raw Text ]---------------------- 4422 4423 # We're already on a raw block 4424 if BLOCK.block() == "raw": 4425 4426 # Closing raw 4427 if regex["blockRawClose"].search(line): 4428 ret.extend(BLOCK.blockout()) 4429 continue 4430 4431 # Normal raw-inside line 4432 BLOCK.holdadd(line) 4433 continue 4434 4435 # Detecting raw block init 4436 if regex["blockRawOpen"].search(line) and BLOCK.block() not in BLOCK.exclusive: 4437 ret.extend(BLOCK.blockin("raw")) 4438 continue 4439 4440 # One line raw text 4441 if regex["1lineRaw"].search(line) and BLOCK.block() not in BLOCK.exclusive: 4442 ret.extend(BLOCK.blockin("raw")) 4443 line = regex["1lineRaw"].sub("", line) 4444 BLOCK.holdadd(line) 4445 ret.extend(BLOCK.blockout()) 4446 continue 4447 4448 # ------------------------[ Verbatim ]---------------------- 4449 4450 # TIP We'll never support beautifiers inside verbatim 4451 4452 # Closing table mapped to verb 4453 if ( 4454 BLOCK.block() == "verb" 4455 and BLOCK.prop("mapped") == "table" 4456 and not regex["table"].search(line) 4457 ): 4458 ret.extend(BLOCK.blockout()) 4459 4460 # We're already on a verb block 4461 if BLOCK.block() == "verb": 4462 4463 # Closing verb 4464 if regex["blockVerbClose"].search(line): 4465 ret.extend(BLOCK.blockout()) 4466 continue 4467 4468 # Normal verb-inside line 4469 BLOCK.holdadd(line) 4470 continue 4471 4472 # Detecting verb block init 4473 if regex["blockVerbOpen"].search(line) and BLOCK.block() not in BLOCK.exclusive: 4474 ret.extend(BLOCK.blockin("verb")) 4475 f_lastwasblank = 0 4476 continue 4477 4478 # One line verb-formatted text 4479 if regex["1lineVerb"].search(line) and BLOCK.block() not in BLOCK.exclusive: 4480 ret.extend(BLOCK.blockin("verb")) 4481 line = regex["1lineVerb"].sub("", line) 4482 BLOCK.holdadd(line) 4483 ret.extend(BLOCK.blockout()) 4484 f_lastwasblank = 0 4485 continue 4486 4487 # Tables are mapped to verb when target is not table-aware 4488 if not rules["tableable"] and regex["table"].search(line): 4489 if not BLOCK.isblock("verb"): 4490 ret.extend(BLOCK.blockin("verb")) 4491 BLOCK.propset("mapped", "table") 4492 BLOCK.holdadd(line) 4493 continue 4494 4495 # ---------------------[ blank lines ]----------------------- 4496 4497 if regex["blankline"].search(line): 4498 4499 # Close open paragraph 4500 if BLOCK.isblock("para"): 4501 ret.extend(BLOCK.blockout()) 4502 f_lastwasblank = 1 4503 continue 4504 4505 # Close all open tables 4506 if BLOCK.isblock("table"): 4507 ret.extend(BLOCK.blockout()) 4508 f_lastwasblank = 1 4509 continue 4510 4511 # Close all open quotes 4512 while BLOCK.isblock("quote"): 4513 ret.extend(BLOCK.blockout()) 4514 4515 # Closing all open lists 4516 if f_lastwasblank: # 2nd consecutive blank 4517 if BLOCK.block().endswith("list"): 4518 BLOCK.holdaddsub("") # helps parser 4519 while BLOCK.depth: # closes list (if any) 4520 ret.extend(BLOCK.blockout()) 4521 continue # ignore consecutive blanks 4522 4523 # Paragraph (if any) is wanted inside lists also 4524 if BLOCK.block().endswith("list"): 4525 BLOCK.holdaddsub("") 4526 4527 f_lastwasblank = 1 4528 continue 4529 4530 # ---------------------[ special ]--------------------------- 4531 4532 if regex["special"].search(line): 4533 4534 targ, key, val = ConfigLines().parse_line(line, None, target) 4535 4536 if key: 4537 Debug("Found config '{}', value '{}'".format(key, val), 1, linenr) 4538 else: 4539 Debug("Bogus Special Line", 1, linenr) 4540 4541 # %!include command 4542 if key == "include": 4543 incpath = os.path.dirname(CONF["sourcefile"]) 4544 incfile = val 4545 err = "A file cannot include itself (loop!)" 4546 if CONF["sourcefile"] == incfile: 4547 Error("{}: {}".format(err, incfile)) 4548 inctype, inclines = get_include_contents(incfile, incpath) 4549 4550 # Verb, raw and tagged are easy 4551 if inctype != "t2t": 4552 ret.extend(BLOCK.blockin(inctype)) 4553 BLOCK.holdextend(inclines) 4554 ret.extend(BLOCK.blockout()) 4555 else: 4556 # Insert include lines into body 4557 # TODO include maxdepth limit 4558 bodylines = bodylines[:lineref] + inclines + bodylines[lineref:] 4559 4560 # This line is done, go to next 4561 continue 4562 4563 # ---------------------[ Comments ]-------------------------- 4564 4565 # Just skip them 4566 if regex["comment"].search(line): 4567 continue 4568 4569 # ---------------------[ Triggers ]-------------------------- 4570 4571 # Valid line, reset blank status 4572 f_lastwasblank = 0 4573 4574 # Any NOT quote line closes all open quotes 4575 if BLOCK.isblock("quote") and not regex["quote"].search(line): 4576 while BLOCK.isblock("quote"): 4577 ret.extend(BLOCK.blockout()) 4578 4579 # Any NOT table line closes an open table 4580 if BLOCK.isblock("table") and not regex["table"].search(line): 4581 ret.extend(BLOCK.blockout()) 4582 4583 # ---------------------[ Horizontal Bar ]-------------------- 4584 4585 if regex["bar"].search(line): 4586 4587 # Bars inside quotes are handled on the Quote processing 4588 # Otherwise we parse the bars right here 4589 # 4590 if not (BLOCK.isblock("quote") or regex["quote"].search(line)) or ( 4591 BLOCK.isblock("quote") and not rules["barinsidequote"] 4592 ): 4593 4594 # Close all the opened blocks 4595 ret.extend(BLOCK.blockin("bar")) 4596 4597 # Extract the bar chars (- or =) 4598 m = regex["bar"].search(line) 4599 bar_chars = m.group(2) 4600 4601 # Process and dump the tagged bar 4602 BLOCK.holdadd(bar_chars) 4603 ret.extend(BLOCK.blockout()) 4604 Debug("BAR: %s" % line, 6) 4605 4606 # We're done, nothing more to process 4607 continue 4608 4609 # ---------------------[ Title ]----------------------------- 4610 4611 if ( 4612 regex["title"].search(line) or regex["numtitle"].search(line) 4613 ) and not BLOCK.block().endswith("list"): 4614 4615 if regex["title"].search(line): 4616 name = "title" 4617 else: 4618 name = "numtitle" 4619 4620 # Close all the opened blocks 4621 ret.extend(BLOCK.blockin(name)) 4622 4623 # Process title 4624 TITLE.add(line) 4625 ret.extend(BLOCK.blockout()) 4626 4627 # We're done, nothing more to process 4628 continue 4629 4630 # ---------------------[ apply masks ]----------------------- 4631 4632 line = MASK.mask(line) 4633 4634 # XXX from here, only block-inside lines will pass 4635 4636 # ---------------------[ Quote ]----------------------------- 4637 4638 if regex["quote"].search(line): 4639 4640 # Store number of leading TABS 4641 quotedepth = len(regex["quote"].search(line).group(0)) 4642 4643 # SGML doesn't support nested quotes 4644 if rules["quotenotnested"]: 4645 quotedepth = 1 4646 4647 # Don't cross depth limit 4648 maxdepth = rules["quotemaxdepth"] 4649 if maxdepth and quotedepth > maxdepth: 4650 quotedepth = maxdepth 4651 4652 # New quote 4653 if not BLOCK.isblock("quote"): 4654 ret.extend(BLOCK.blockin("quote")) 4655 4656 # New subquotes 4657 while BLOCK.depth < quotedepth: 4658 BLOCK.blockin("quote") 4659 4660 # Closing quotes 4661 while quotedepth < BLOCK.depth: 4662 ret.extend(BLOCK.blockout()) 4663 4664 # Bar inside quote 4665 if regex["bar"].search(line) and rules["barinsidequote"]: 4666 tempBlock = BlockMaster() 4667 tagged_bar = [] 4668 tagged_bar.extend(tempBlock.blockin("bar")) 4669 tempBlock.holdadd(line) 4670 tagged_bar.extend(tempBlock.blockout()) 4671 BLOCK.holdextend(tagged_bar) 4672 continue 4673 4674 # ---------------------[ Lists ]----------------------------- 4675 4676 # An empty item also closes the current list 4677 if BLOCK.block().endswith("list"): 4678 m = regex["listclose"].match(line) 4679 if m: 4680 listindent = m.group(1) 4681 listtype = m.group(2) 4682 currlisttype = BLOCK.prop("type") 4683 currlistindent = BLOCK.prop("indent") 4684 if listindent == currlistindent and listtype == currlisttype: 4685 ret.extend(BLOCK.blockout()) 4686 continue 4687 4688 if ( 4689 regex["list"].search(line) 4690 or regex["numlist"].search(line) 4691 or regex["deflist"].search(line) 4692 ): 4693 4694 listindent = BLOCK.prop("indent") 4695 listids = "".join(LISTNAMES.keys()) 4696 m = re.match("^( *)([%s]) " % re.escape(listids), line) 4697 listitemindent = m.group(1) 4698 listtype = m.group(2) 4699 listname = LISTNAMES[listtype] 4700 results_box = BLOCK.holdadd 4701 4702 # Del list ID (and separate term from definition) 4703 if listname == "deflist": 4704 term = parse_deflist_term(line) 4705 line = regex["deflist"].sub(SEPARATOR + term + SEPARATOR, line) 4706 else: 4707 line = regex[listname].sub(SEPARATOR, line) 4708 4709 # Don't cross depth limit 4710 maxdepth = rules["listmaxdepth"] 4711 if maxdepth and BLOCK.depth == maxdepth: 4712 if len(listitemindent) > len(listindent): 4713 listitemindent = listindent 4714 4715 # List bumping (same indent, diff mark) 4716 # Close the currently open list to clear the mess 4717 if ( 4718 BLOCK.block().endswith("list") 4719 and listname != BLOCK.block() 4720 and len(listitemindent) == len(listindent) 4721 ): 4722 ret.extend(BLOCK.blockout()) 4723 listindent = BLOCK.prop("indent") 4724 4725 # Open mother list or sublist 4726 if not BLOCK.block().endswith("list") or len(listitemindent) > len( 4727 listindent 4728 ): 4729 ret.extend(BLOCK.blockin(listname)) 4730 BLOCK.propset("indent", listitemindent) 4731 BLOCK.propset("type", listtype) 4732 4733 # Closing sublists 4734 while len(listitemindent) < len(BLOCK.prop("indent")): 4735 ret.extend(BLOCK.blockout()) 4736 4737 # O-oh, sublist before list ("\n\n - foo\n- foo") 4738 # Fix: close sublist (as mother), open another list 4739 if not BLOCK.block().endswith("list"): 4740 ret.extend(BLOCK.blockin(listname)) 4741 BLOCK.propset("indent", listitemindent) 4742 BLOCK.propset("type", listtype) 4743 4744 # ---------------------[ Table ]----------------------------- 4745 4746 # TODO escape undesired format inside table 4747 if regex["table"].search(line): 4748 4749 if not BLOCK.isblock("table"): # first table line! 4750 ret.extend(BLOCK.blockin("table")) 4751 BLOCK.tableparser.__init__(line) 4752 4753 tablerow = TableMaster().parse_row(line) 4754 BLOCK.tableparser.add_row(tablerow) # save config 4755 4756 # Maintain line to unmask and inlines 4757 # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :( 4758 # TODO isolate unmask+inlines parsing to use here 4759 line = SEPARATOR.join(tablerow["cells"]) 4760 4761 # ---------------------[ Paragraph ]------------------------- 4762 4763 if not BLOCK.block(): # new para! 4764 ret.extend(BLOCK.blockin("para")) 4765 4766 ############################################################ 4767 ############################################################ 4768 ############################################################ 4769 4770 # ---------------------[ Final Parses ]---------------------- 4771 4772 # The target-specific special char escapes for body lines 4773 line = doEscape(target, line) 4774 4775 line = add_inline_tags(line) 4776 line = MASK.undo(line) 4777 4778 # ---------------------[ Hold or Return? ]------------------- 4779 4780 # Now we must choose where to put the parsed line 4781 # 4782 if not results_box: 4783 # List item extra lines 4784 if BLOCK.block().endswith("list"): 4785 results_box = BLOCK.holdaddsub 4786 # Other blocks 4787 elif BLOCK.block(): 4788 results_box = BLOCK.holdadd 4789 # No blocks 4790 else: 4791 line = doFinalEscape(target, line) 4792 results_box = ret.append 4793 4794 results_box(line) 4795 4796 # EOF: close any open para/verb/lists/table/quotes 4797 Debug("EOF", 7) 4798 while BLOCK.block(): 4799 ret.extend(BLOCK.blockout()) 4800 4801 # Maybe close some opened title area? 4802 if rules["titleblocks"]: 4803 ret.extend(TITLE.close_all()) 4804 4805 # Maybe a major tag to enclose body? (like DIV for CSS) 4806 if TAGS["bodyOpen"]: 4807 ret.insert(0, TAGS["bodyOpen"]) 4808 if TAGS["bodyClose"]: 4809 ret.append(TAGS["bodyClose"]) 4810 4811 marked_toc = TITLE.dump_marked_toc() 4812 4813 return ret, marked_toc 4814 4815 4816def exec_command_line(user_cmdline=None): 4817 global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, Error 4818 4819 # Extract command line data 4820 cmdline_data = user_cmdline or sys.argv[1:] 4821 CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=True) 4822 cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse() 4823 DEBUG = cmdline_parsed.get("debug") or 0 4824 VERBOSE = cmdline_parsed.get("verbose") or 0 4825 QUIET = cmdline_parsed.get("quiet") or 0 4826 infiles = cmdline_parsed.get("infile") or [] 4827 4828 Message("Processing begins", 1) 4829 4830 # The easy ones 4831 if cmdline_parsed.get("help"): 4832 Quit(USAGE) 4833 if cmdline_parsed.get("version"): 4834 Quit(VERSIONSTR) 4835 if cmdline_parsed.get("targets"): 4836 listTargets() 4837 Quit() 4838 4839 Debug("system platform: %s" % sys.platform) 4840 Debug("python version: %s" % (sys.version.split("(")[0])) 4841 Debug("command line: %s" % sys.argv) 4842 Debug("command line raw config: %s" % CMDLINE_RAW, 1) 4843 4844 # Extract RC file config 4845 if cmdline_parsed.get("rc") == 0: 4846 Message("Ignoring user configuration file", 1) 4847 else: 4848 rc_file = get_rc_path() 4849 if os.path.isfile(rc_file): 4850 Message("Loading user configuration file", 1) 4851 RC_RAW = ConfigLines(file_=rc_file).get_raw_config() 4852 4853 Debug("rc file: %s" % rc_file) 4854 Debug("rc file raw config: %s" % RC_RAW, 1) 4855 4856 # TODO#1: this checking should be only in ConfigMaster.sanity() 4857 if len(infiles) == 1: 4858 infile = infiles[0] 4859 else: 4860 Error( 4861 "Pass exactly one input file (see --help). " 4862 "Example: {} -t html file.t2t".format(my_name) 4863 ) 4864 4865 config, doc = process_source_file(infile) 4866 headers, config_source, body = doc 4867 4868 first_body_lineno = (len(headers) or 1) + len(config_source) + 1 4869 convert_file(headers, body, config, first_body_lineno=first_body_lineno) 4870 4871 Message("Txt2tags finished successfully", 1) 4872 4873 4874if __name__ == "__main__": 4875 try: 4876 exec_command_line() 4877 except error as msg: 4878 sys.exit(msg) 4879 except Exception: 4880 sys.exit(getUnknownErrorMessage()) 4881 else: 4882 Quit() 4883