1#!/usr/local/bin/python3.8
2# txt2tags - generic text conversion tool
3# https://txt2tags.org/
4# https://github.com/jendrikseipp/txt2tags
5#
6# Copyright 2001-2010 Aurelio Jargas
7# Copyright 2010-2019 Jendrik Seipp
8#
9# License: GPL2+ (http://www.gnu.org/licenses/gpl-2.0.txt)
10#
11########################################################################
12#
13# The code that [1] parses the marked text is separated from the
14# code that [2] insert the target tags.
15#
16#   [1] made by: def convert()
17#   [2] made by: class BlockMaster
18#
19# The structures of the marked text are identified and its contents are
20# extracted into a data holder (Python lists and dictionaries).
21#
22# When parsing the source file, the blocks (para, lists, quote, table)
23# are opened with BlockMaster, right when found. Then its contents,
24# which spans on several lines, are feeded into a special holder on the
25# BlockMaster instance. Just when the block is closed, the target tags
26# are inserted for the full block as a whole, in one pass. This way, we
27# have a better control on blocks. Much better than the previous line by
28# line approach.
29#
30# In other words, whenever inside a block, the parser *holds* the tag
31# insertion process, waiting until the full block is read. That was
32# needed primary to close paragraphs for the XHTML target, but
33# proved to be a very good adding, improving many other processing.
34#
35# -------------------------------------------------------------------
36#
37# These important classes are all documented:
38# CommandLine, SourceDocument, ConfigMaster, ConfigLines.
39#
40# There is a RAW Config format and all kind of configuration is first
41# converted to this format. Then a generic method parses it.
42#
43# These functions get information about the input file(s) and take
44# care of the init processing:
45# process_source_file() and convert_file()
46#
47########################################################################
48
49# XXX Smart Image Align don't work if the image is a link
50# Can't fix that because the image is expanded together with the
51# link, at the linkbank filling moment. Only the image is passed
52# to parse_images(), not the full line, so it is always 'middle'.
53
54# XXX Paragraph separation not valid inside Quote
55# Quote will not have <p></p> inside, instead will close and open
56# again the <blockquote>. This really sux in CSS, when defining a
57# different background color. Still don't know how to fix it.
58
59# XXX TODO (maybe)
60# New mark which expands to an anchor full title.
61# It is necessary to parse the full document in this order:
62#  DONE  1st scan: HEAD: get all settings, including %!includeconf
63#  DONE  2nd scan: BODY: expand includes & apply %!preproc
64#        3rd scan: BODY: read titles and compose TOC info
65#        4th scan: BODY: full parsing, expanding [#anchor] 1st
66# Steps 2 and 3 can be made together, with no tag adding.
67# Two complete body scans will be *slow*, don't know if it worths.
68# One solution may be add the titles as postproc rules
69
70from __future__ import print_function
71
72import collections
73import getopt
74import io
75import os
76import re
77import sys
78
79##############################################################################
80
81# Program information
82my_url = "https://txt2tags.org"
83my_name = "txt2tags"
84my_email = "jendrikseipp@gmail.com"
85__version__ = "3.7"
86
87# FLAGS   : the conversion related flags  , may be used in %!options
88# OPTIONS : the conversion related options, may be used in %!options
89# ACTIONS : the other behavior modifiers, valid on command line only
90# NO_TARGET: actions that don't require a target specification
91# NO_MULTI_INPUT: actions that don't accept more than one input file
92# CONFIG_KEYWORDS: the valid %!key:val keywords
93#
94# FLAGS and OPTIONS are configs that affect the converted document.
95# They usually have also a --no-<option> to turn them OFF.
96#
97# ACTIONS are needed because when handling multiple input files, strange
98# behavior may occur. There is no --no-<action>.
99# Options --version and --help inside %!options are odd.
100
101FLAGS = {
102    "headers": 1,
103    "enum-title": 0,
104    "toc": 0,
105    "rc": 1,
106    "quiet": 0,
107    "slides": 0,
108}
109OPTIONS = {
110    "target": "",
111    "style": "",
112    "infile": "",
113    "outfile": "",
114    "config-file": "",
115    "lang": "",
116}
117ACTIONS = {
118    "help": 0,
119    "version": 0,
120    "verbose": 0,
121    "debug": 0,
122    "targets": 0,
123}
124NO_TARGET = ["help", "version", "targets"]
125CONFIG_KEYWORDS = ["target", "style", "options", "preproc", "postproc"]
126
127TARGET_NAMES = {
128    "html": "HTML page",
129    "sgml": "SGML document",
130    "dbk": "DocBook document",
131    "tex": "LaTeX document",
132    "lout": "Lout document",
133    "man": "UNIX Manual page",
134    "mgp": "MagicPoint presentation",
135    "wiki": "Wikipedia page",
136    "gwiki": "Google Wiki page",
137    "doku": "DokuWiki page",
138    "pmw": "PmWiki page",
139    "moin": "MoinMoin page",
140    "txt": "Plain Text",
141    "adoc": "AsciiDoc document",
142    "creole": "Creole 1.0 document",
143    "md": "Markdown document",
144}
145
146TARGETS = sorted(TARGET_NAMES)
147
148DEBUG = 0  # do not edit here, please use --debug
149VERBOSE = 0  # do not edit here, please use -v, -vv or -vvv
150QUIET = 0  # do not edit here, please use --quiet
151
152ENCODING = "utf-8"
153DFT_TEXT_WIDTH = 72
154
155RC_RAW = []
156CMDLINE_RAW = []
157CONF = {}
158BLOCK = None
159TITLE = None
160regex = {}
161TAGS = {}
162rules = {}
163
164TARGET = ""
165
166STDIN = STDOUT = "-"
167MODULEIN = MODULEOUT = "-module-"
168ESCCHAR = "\x00"
169SEPARATOR = "\x01"
170LISTNAMES = {"-": "list", "+": "numlist", ":": "deflist"}
171
172VERSIONSTR = "{} version {} <{}>".format(my_name, __version__, my_url)
173
174USAGE = "\n".join(
175    [
176        "",
177        "Usage: %s [OPTIONS] infile.t2t" % my_name,
178        "",
179        "      --targets       list available targets and exit",
180        "  -t, --target=TYPE   set target document type. currently supported:",
181        "                      %s" % ", ".join(TARGETS),
182        "  -i, --infile=FILE   set FILE as the input file name ('-' for STDIN)",
183        "  -o, --outfile=FILE  set FILE as the output file name ('-' for STDOUT)",
184        "      --toc           add a table of contents to the output",
185        "  -n, --enum-title    enumerate all titles as 1, 1.1, 1.1.1, etc.",
186        "      --style=FILE    use FILE as the document style (e.g., a CSS file)",
187        "  -H, --no-headers    omit header and footer from output",
188        "  -C, --config-file=F read configuration from file F",
189        "  -q, --quiet         suppress all output (except errors)",
190        "  -v, --verbose       print informative messages during conversion",
191        "  -h, --help          print this help text and exit",
192        "  -V, --version       print program version and exit",
193        "",
194        "Turn off options:",
195        "     --no-enum-title, --headers, --no-quiet,",
196        "     --no-rc, --no-style, --no-toc",
197        "",
198        "Example:",
199        "     {} -t html --toc {}".format(my_name, "file.t2t"),
200        "",
201        "By default, converted output is saved to 'infile.<target>'.",
202        "Use --outfile to force an output file name.",
203        "If  input file is '-', read from STDIN.",
204        "If output file is '-', dump output to STDOUT.",
205        "",
206        my_url,
207        "",
208    ]
209)
210
211
212##############################################################################
213
214
215# Here is all the target's templates
216# You may edit them to fit your needs
217#  - the %(HEADERn)s strings represent the Header lines
218#  - the %(STYLE)s string is changed by --style contents
219#  - the %(ENCODING)s string is changed to "utf-8"
220#  - if any of the above is empty, the full line is removed
221#  - use %% to represent a literal %
222#
223HEADER_TEMPLATE = {
224    "txt": """\
225%(HEADER1)s
226%(HEADER2)s
227%(HEADER3)s
228""",
229    "sgml": """\
230<!doctype linuxdoc system>
231<article>
232<title>%(HEADER1)s
233<author>%(HEADER2)s
234<date>%(HEADER3)s
235""",
236    "html": """\
237<!DOCTYPE html>
238<html>
239<head>
240<meta charset="%(ENCODING)s">
241<title>%(HEADER1)s</title>
242<meta name="generator" content="https://txt2tags.org">
243<link rel="stylesheet" href="%(STYLE)s">
244<style type="text/css">
245blockquote{margin: 1em 2em; border-left: 2px solid #999;
246  font-style: oblique; padding-left: 1em;}
247blockquote:first-letter{margin: .2em .1em .1em 0; font-size: 160%%; font-weight: bold;}
248blockquote:first-line{font-weight: bold;}
249body{font-family: sans-serif;}
250hr{background-color:#000;border:0;color:#000;}
251hr.heavy{height:2px;}
252hr.light{height:1px;}
253img{border:0;display:block;}
254img.right{margin:0 0 0 auto;}
255img.center{border:0;margin:0 auto;}
256table{border-collapse: collapse;}
257table th,table td{padding: 3px 7px 2px 7px;}
258table th{background-color: lightgrey;}
259table.center{margin-left:auto; margin-right:auto;}
260.center{text-align:center;}
261.right{text-align:right;}
262.left{text-align:left;}
263.tableborder,.tableborder td,.tableborder th{border:1px solid #000;}
264.underline{text-decoration:underline;}
265</style>
266</head>
267<body>
268<header>
269<hgroup>
270<h1>%(HEADER1)s</h1>
271<h2>%(HEADER2)s</h2>
272<h3>%(HEADER3)s</h3>
273</hgroup>
274</header>
275<article>
276""",
277    "dbk": """\
278<?xml version="1.0"
279      encoding="%(ENCODING)s"
280?>
281<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"\
282 "docbook/dtd/xml/4.5/docbookx.dtd">
283<article lang="en">
284  <articleinfo>
285    <title>%(HEADER1)s</title>
286    <authorgroup>
287      <author><othername>%(HEADER2)s</othername></author>
288    </authorgroup>
289    <date>%(HEADER3)s</date>
290  </articleinfo>
291""",
292    "man": """\
293.TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
294""",
295    "mgp": """\
296#!/usr/X11R6/bin/mgp -t 90
297%%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
298%%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
299%%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
300%%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
301%%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
302%%default 1 size 5
303%%default 2 size 8, fore "yellow", font "normal-b", center
304%%default 3 size 5, fore "white",  font "normal", left, prefix "  "
305%%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
306%%tab 2 prefix "            ", icon arc "orange" 40, leftfill
307%%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
308%%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
309%%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
310%%%%------------------------- end of headers -----------------------------
311%%page
312
313
314
315
316
317%%size 10, center, fore "yellow"
318%(HEADER1)s
319
320%%font "normal-i", size 6, fore "white", center
321%(HEADER2)s
322
323%%font "mono", size 7, center
324%(HEADER3)s
325""",
326    "moin": """\
327'''%(HEADER1)s'''
328
329''%(HEADER2)s''
330
331%(HEADER3)s
332""",
333    "gwiki": """\
334*%(HEADER1)s*
335
336%(HEADER2)s
337
338_%(HEADER3)s_
339""",
340    "adoc": """\
341= %(HEADER1)s
342%(HEADER2)s
343%(HEADER3)s
344""",
345    "doku": """\
346===== %(HEADER1)s =====
347
348**//%(HEADER2)s//**
349
350//%(HEADER3)s//
351""",
352    "pmw": """\
353(:Title %(HEADER1)s:)
354
355(:Description %(HEADER2)s:)
356
357(:Summary %(HEADER3)s:)
358""",
359    "wiki": """\
360'''%(HEADER1)s'''
361
362%(HEADER2)s
363
364''%(HEADER3)s''
365""",
366    "tex": r"""\documentclass{article}
367\usepackage{booktabs} %% needed for tables
368\usepackage{graphicx}
369\usepackage{paralist} %% needed for compact lists
370\usepackage[normalem]{ulem} %% needed by strike
371\usepackage[urlcolor=blue,colorlinks=true]{hyperref}
372\usepackage[%(ENCODING)s]{inputenc}  %% char encoding
373\usepackage{%(STYLE)s}  %% user defined
374
375\title{%(HEADER1)s}
376\author{%(HEADER2)s}
377\begin{document}
378\date{%(HEADER3)s}
379\maketitle
380\clearpage
381""",
382    "lout": """\
383@SysInclude { doc }
384@Document
385  @InitialFont { Times Base 12p }  # Times, Courier, Helvetica, ...
386  @PageOrientation { Portrait }    # Portrait, Landscape
387  @ColumnNumber { 1 }              # Number of columns (2, 3, ...)
388  @PageHeaders { Simple }          # None, Simple, Titles, NoTitles
389  @InitialLanguage { English }     # German, French, Portuguese, ...
390  @OptimizePages { Yes }           # Yes/No smart page break feature
391//
392@Text @Begin
393@Display @Heading { %(HEADER1)s }
394@Display @I { %(HEADER2)s }
395@Display { %(HEADER3)s }
396#@NP                               # Break page after Headers
397""",
398    "creole": """\
399%(HEADER1)s
400%(HEADER2)s
401%(HEADER3)s
402""",
403    "md": """\
404%(HEADER1)s
405%(HEADER2)s
406%(HEADER3)s
407"""
408    # @SysInclude { tbl }                   # Tables support
409    # setup: @MakeContents { Yes }          # show TOC
410    # setup: @SectionGap                    # break page at each section
411}
412assert set(HEADER_TEMPLATE) == set(TARGETS)
413
414
415##############################################################################
416
417
418def getTags(config):
419    "Returns all the known tags for the specified target"
420
421    keys = """
422    title1              numtitle1
423    title2              numtitle2
424    title3              numtitle3
425    title4              numtitle4
426    title5              numtitle5
427    title1Open          title1Close
428    title2Open          title2Close
429    title3Open          title3Close
430    title4Open          title4Close
431    title5Open          title5Close
432    blockTitle1Open     blockTitle1Close
433    blockTitle2Open     blockTitle2Close
434    blockTitle3Open     blockTitle3Close
435
436    paragraphOpen       paragraphClose
437    blockVerbOpen       blockVerbClose  blockVerbLine
438    blockQuoteOpen      blockQuoteClose blockQuoteLine
439    blockCommentOpen    blockCommentClose
440
441    fontMonoOpen        fontMonoClose
442    fontBoldOpen        fontBoldClose
443    fontItalicOpen      fontItalicClose
444    fontUnderlineOpen   fontUnderlineClose
445    fontStrikeOpen      fontStrikeClose
446
447    listOpen            listClose
448    listOpenCompact     listCloseCompact
449    listItemOpen        listItemClose     listItemLine
450    numlistOpen         numlistClose
451    numlistOpenCompact  numlistCloseCompact
452    numlistItemOpen     numlistItemClose  numlistItemLine
453    deflistOpen         deflistClose
454    deflistOpenCompact  deflistCloseCompact
455    deflistItem1Open    deflistItem1Close
456    deflistItem2Open    deflistItem2Close deflistItem2LinePrefix
457
458    bar1                bar2
459    url                 urlMark
460    email               emailMark
461    img                 imgAlignLeft  imgAlignRight  imgAlignCenter
462                       _imgAlignLeft _imgAlignRight _imgAlignCenter
463
464    tableOpen           tableClose
465    _tableBorder        _tableAlignLeft      _tableAlignCenter
466    tableRowOpen        tableRowClose        tableRowSep
467    tableTitleRowOpen   tableTitleRowClose
468    tableCellOpen       tableCellClose       tableCellSep
469    tableTitleCellOpen  tableTitleCellClose  tableTitleCellSep
470    _tableColAlignLeft  _tableColAlignRight  _tableColAlignCenter
471    _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter
472    _tableCellColSpan   tableColAlignSep
473    _tableCellMulticolOpen
474    _tableCellMulticolClose
475
476    bodyOpen            bodyClose
477    cssOpen             cssClose
478    tocOpen             tocClose             TOC
479    anchor
480    comment
481    pageBreak
482    EOD
483    """.split()
484
485    # TIP: \a represents the current text inside the mark
486    # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
487    alltags = {
488        "txt": {
489            "title1": "  \a",
490            "title2": "\t\a",
491            "title3": "\t\t\a",
492            "title4": "\t\t\t\a",
493            "title5": "\t\t\t\t\a",
494            "blockQuoteLine": "\t",
495            "listItemOpen": "- ",
496            "numlistItemOpen": "\a. ",
497            "bar1": "\a",
498            "url": "\a",
499            "urlMark": "\a (\a)",
500            "email": "\a",
501            "emailMark": "\a (\a)",
502            "img": "[\a]",
503        },
504        "html": {
505            "anchor": ' id="\a"',
506            "bar1": '<hr class="light">',
507            "bar2": '<hr class="heavy">',
508            "blockQuoteClose": "</blockquote>",
509            "blockQuoteOpen": "<blockquote>",
510            "blockVerbClose": "</pre>",
511            "blockVerbOpen": "<pre>",
512            "bodyClose": "</div>",
513            "bodyOpen": '<div class="body" id="body">',
514            "comment": "<!-- \a -->",
515            "cssClose": "</style>",
516            "cssOpen": "<style>",
517            "deflistClose": "</dl>",
518            "deflistItem1Close": "</dt>",
519            "deflistItem1Open": "<dt>",
520            "deflistItem2Close": "</dd>",
521            "deflistItem2Open": "<dd>",
522            "deflistOpen": "<dl>",
523            "email": '<a href="mailto:\a">\a</a>',
524            "emailMark": '<a href="mailto:\a">\a</a>',
525            "EOD": "</article></body></html>",
526            "fontBoldClose": "</strong>",
527            "fontBoldOpen": "<strong>",
528            "fontItalicClose": "</em>",
529            "fontItalicOpen": "<em>",
530            "fontMonoClose": "</code>",
531            "fontMonoOpen": "<code>",
532            "fontStrikeClose": "</del>",
533            "fontStrikeOpen": "<del>",
534            "fontUnderlineClose": "</span>",
535            "fontUnderlineOpen": '<span class="underline">',
536            "_imgAlignCenter": ' class="center"',
537            "_imgAlignLeft": ' class="left"',
538            "_imgAlignRight": ' class="right"',
539            "img": '<img~a~ src="\a" alt="">',
540            "listClose": "</ul>",
541            "listItemClose": "</li>",
542            "listItemOpen": "<li>",
543            "listOpen": "<ul>",
544            "numlistClose": "</ol>",
545            "numlistItemClose": "</li>",
546            "numlistItemOpen": "<li>",
547            "numlistOpen": "<ol>",
548            "paragraphClose": "</p>",
549            "paragraphOpen": "<p>",
550            "_tableAlignCenter": ' style="margin-left: auto; margin-right: auto;"',
551            "_tableBorder": ' class="tableborder"',
552            "_tableCellAlignCenter": ' class="center"',
553            "_tableCellAlignRight": ' class="right"',
554            "tableCellClose": "</td>",
555            "_tableCellColSpan": ' colspan="\a"',
556            "tableCellOpen": "<td~a~~s~>",
557            "tableClose": "</table>",
558            "tableOpen": "<table~a~~b~>",
559            "tableRowClose": "</tr>",
560            "tableRowOpen": "<tr>",
561            "tableTitleCellClose": "</th>",
562            "tableTitleCellOpen": "<th~s~>",
563            "title1Close": "</section>",
564            "title1Open": "<section~A~>\n<h1>\a</h1>",
565            "title2Close": "</section>",
566            "title2Open": "<section~A~>\n<h2>\a</h2>",
567            "title3Close": "</section>",
568            "title3Open": "<section~A~>\n<h3>\a</h3>",
569            "title4Close": "</section>",
570            "title4Open": "<section~A~>\n<h4>\a</h4>",
571            "title5Close": "</section>",
572            "title5Open": "<section~A~>\n<h5>\a</h5>",
573            "tocClose": "</nav>",
574            "tocOpen": "<nav>",
575            "url": '<a href="\a">\a</a>',
576            "urlMark": '<a href="\a">\a</a>',
577        },
578        "sgml": {
579            "paragraphOpen": "<p>",
580            "title1": "<sect>\a~A~<p>",
581            "title2": "<sect1>\a~A~<p>",
582            "title3": "<sect2>\a~A~<p>",
583            "title4": "<sect3>\a~A~<p>",
584            "title5": "<sect4>\a~A~<p>",
585            "anchor": '<label id="\a">',
586            "blockVerbOpen": "<tscreen><verb>",
587            "blockVerbClose": "</verb></tscreen>",
588            "blockQuoteOpen": "<quote>",
589            "blockQuoteClose": "</quote>",
590            "fontMonoOpen": "<tt>",
591            "fontMonoClose": "</tt>",
592            "fontBoldOpen": "<bf>",
593            "fontBoldClose": "</bf>",
594            "fontItalicOpen": "<em>",
595            "fontItalicClose": "</em>",
596            "fontUnderlineOpen": "<bf><em>",
597            "fontUnderlineClose": "</em></bf>",
598            "listOpen": "<itemize>",
599            "listClose": "</itemize>",
600            "listItemOpen": "<item>",
601            "numlistOpen": "<enum>",
602            "numlistClose": "</enum>",
603            "numlistItemOpen": "<item>",
604            "deflistOpen": "<descrip>",
605            "deflistClose": "</descrip>",
606            "deflistItem1Open": "<tag>",
607            "deflistItem1Close": "</tag>",
608            "bar1": "<!-- \a -->",
609            "url": '<htmlurl url="\a" name="\a">',
610            "urlMark": '<htmlurl url="\a" name="\a">',
611            "email": '<htmlurl url="mailto:\a" name="\a">',
612            "emailMark": '<htmlurl url="mailto:\a" name="\a">',
613            "img": '<figure><ph vspace=""><img src="\a"></figure>',
614            "tableOpen": '<table><tabular ca="~C~">',
615            "tableClose": "</tabular></table>",
616            "tableRowSep": "<rowsep>",
617            "tableCellSep": "<colsep>",
618            "_tableColAlignLeft": "l",
619            "_tableColAlignRight": "r",
620            "_tableColAlignCenter": "c",
621            "comment": "<!-- \a -->",
622            "TOC": "<toc>",
623            "EOD": "</article>",
624        },
625        "dbk": {
626            "paragraphOpen": "<para>",
627            "paragraphClose": "</para>",
628            "title1Open": "~A~<sect1><title>\a</title>",
629            "title1Close": "</sect1>",
630            "title2Open": "~A~  <sect2><title>\a</title>",
631            "title2Close": "  </sect2>",
632            "title3Open": "~A~    <sect3><title>\a</title>",
633            "title3Close": "    </sect3>",
634            "title4Open": "~A~      <sect4><title>\a</title>",
635            "title4Close": "      </sect4>",
636            "title5Open": "~A~        <sect5><title>\a</title>",
637            "title5Close": "        </sect5>",
638            "anchor": '<anchor id="\a"/>\n',
639            "blockVerbOpen": "<programlisting>",
640            "blockVerbClose": "</programlisting>",
641            "blockQuoteOpen": "<blockquote><para>",
642            "blockQuoteClose": "</para></blockquote>",
643            "fontMonoOpen": "<code>",
644            "fontMonoClose": "</code>",
645            "fontBoldOpen": '<emphasis role="bold">',
646            "fontBoldClose": "</emphasis>",
647            "fontItalicOpen": "<emphasis>",
648            "fontItalicClose": "</emphasis>",
649            "fontUnderlineOpen": '<emphasis role="underline">',
650            "fontUnderlineClose": "</emphasis>",
651            "fontStrikeOpen": None,  # Maybe <emphasis role="strikethrough">
652            "fontStrikeClose": None,  # Maybe </emphasis>
653            "listOpen": "<itemizedlist>",
654            "listClose": "</itemizedlist>",
655            "listItemOpen": "<listitem><para>",
656            "listItemClose": "</para></listitem>",
657            "numlistOpen": '<orderedlist numeration="arabic">',
658            "numlistClose": "</orderedlist>",
659            "numlistItemOpen": "<listitem><para>",
660            "numlistItemClose": "</para></listitem>",
661            "deflistOpen": "<variablelist>",
662            "deflistClose": "</variablelist>",
663            "deflistItem1Open": "<varlistentry><term>",
664            "deflistItem1Close": "</term>",
665            "deflistItem2Open": "<listitem><para>",
666            "deflistItem2Close": "</para></listitem></varlistentry>",
667            "bar1": None,
668            "bar2": None,
669            "url": '<ulink url="\a">\a</ulink>',
670            "urlMark": '<ulink url="\a">\a</ulink>',
671            "email": "<email>\a</email>",
672            "emailMark": "<email>\a</email>",
673            "img": (
674                '<mediaobject><imageobject><imagedata fileref="\a"/>'
675                "</imageobject></mediaobject>"
676            ),
677            # Tables not supported, need to know number of columns.
678            # 'tableOpen'            : '<informaltable><tgroup cols=""><tbody>',
679            # 'tableClose'           : '</tbody></tgroup></informaltable>' ,
680            # 'tableRowOpen'         : '<row>'                             ,
681            # 'tableRowClose'        : '</row>'                            ,
682            # 'tableCellOpen'        : '<entry>'                           ,
683            # 'tableCellClose'       : '</entry>'                          ,
684            # 'tableTitleRowOpen'    : '<thead>'                           ,
685            # 'tableTitleRowClose'   : '</thead>'                          ,
686            # '_tableBorder'         : ' frame="all"'                      ,
687            # '_tableAlignCenter'    : ' align="center"'                   ,
688            # '_tableCellAlignRight' : ' align="right"'                    ,
689            # '_tableCellAlignCenter': ' align="center"'                   ,
690            # '_tableCellColSpan'    : ' COLSPAN="\a"'                     ,
691            "TOC": "<index/>",
692            "comment": "<!-- \a -->",
693            "EOD": "</article>",
694        },
695        "tex": {
696            "title1": "~A~\\section*{\a}",
697            "title2": "~A~\\subsection*{\a}",
698            "title3": "~A~\\subsubsection*{\a}",
699            # title 4/5: DIRTY: para+BF+\\+\n
700            "title4": "~A~\\paragraph{}\\textbf{\a}\\\\\n",
701            "title5": "~A~\\paragraph{}\\textbf{\a}\\\\\n",
702            "numtitle1": "\n~A~\\section{\a}",
703            "numtitle2": "~A~\\subsection{\a}",
704            "numtitle3": "~A~\\subsubsection{\a}",
705            "anchor": "\\hypertarget{\a}{}\n",
706            "blockVerbOpen": "\\begin{verbatim}",
707            "blockVerbClose": "\\end{verbatim}",
708            "blockQuoteOpen": "\\begin{quotation}",
709            "blockQuoteClose": "\\end{quotation}",
710            "fontMonoOpen": "\\texttt{",
711            "fontMonoClose": "}",
712            "fontBoldOpen": "\\textbf{",
713            "fontBoldClose": "}",
714            "fontItalicOpen": "\\textit{",
715            "fontItalicClose": "}",
716            "fontUnderlineOpen": "\\underline{",
717            "fontUnderlineClose": "}",
718            "fontStrikeOpen": "\\sout{",
719            "fontStrikeClose": "}",
720            "listOpen": "\\begin{itemize}",
721            "listClose": "\\end{itemize}",
722            "listOpenCompact": "\\begin{compactitem}",
723            "listCloseCompact": "\\end{compactitem}",
724            "listItemOpen": "\\item ",
725            "numlistOpen": "\\begin{enumerate}",
726            "numlistClose": "\\end{enumerate}",
727            "numlistOpenCompact": "\\begin{compactenum}",
728            "numlistCloseCompact": "\\end{compactenum}",
729            "numlistItemOpen": "\\item ",
730            "deflistOpen": "\\begin{description}",
731            "deflistClose": "\\end{description}",
732            "deflistOpenCompact": "\\begin{compactdesc}",
733            "deflistCloseCompact": "\\end{compactdesc}",
734            "deflistItem1Open": "\\item[",
735            "deflistItem1Close": "]",
736            "bar1": "\\hrulefill{}",
737            "bar2": "\\rule{\\linewidth}{1mm}",
738            "url": "\\htmladdnormallink{\a}{\a}",
739            "urlMark": "\\htmladdnormallink{\a}{\a}",
740            "email": "\\htmladdnormallink{\a}{mailto:\a}",
741            "emailMark": "\\htmladdnormallink{\a}{mailto:\a}",
742            "img": "\\includegraphics{\a}",
743            "tableOpen": "\\begin{tabular}{@{}~C~@{}}",
744            "tableClose": "\\end{tabular}",
745            "tableRowOpen": None,
746            "tableRowClose": " \\\\",
747            "tableTitleRowClose": " \\\\\n\\midrule",
748            "tableCellSep": " & ",
749            "_tableColAlignLeft": "l",
750            "_tableColAlignRight": "r",
751            "_tableColAlignCenter": "c",
752            "_tableCellAlignLeft": "l",
753            "_tableCellAlignRight": "r",
754            "_tableCellAlignCenter": "c",
755            "_tableCellColSpan": "\a",
756            "_tableCellMulticolOpen": "\\multicolumn{\a}{|~C~|}{",
757            "_tableCellMulticolClose": "}",
758            "tableColAlignSep": None,
759            "comment": "% \a",
760            "TOC": "\\tableofcontents",
761            "pageBreak": "\\clearpage",
762            "EOD": "\\end{document}",
763        },
764        "lout": {
765            "paragraphOpen": "@LP",
766            "blockTitle1Open": "@BeginSections",
767            "blockTitle1Close": "@EndSections",
768            "blockTitle2Open": " @BeginSubSections",
769            "blockTitle2Close": " @EndSubSections",
770            "blockTitle3Open": "  @BeginSubSubSections",
771            "blockTitle3Close": "  @EndSubSubSections",
772            "title1Open": "~A~@Section @Title { \a } @Begin",
773            "title1Close": "@End @Section",
774            "title2Open": "~A~ @SubSection @Title { \a } @Begin",
775            "title2Close": " @End @SubSection",
776            "title3Open": "~A~  @SubSubSection @Title { \a } @Begin",
777            "title3Close": "  @End @SubSubSection",
778            "title4Open": "~A~@LP @LeftDisplay @B { \a }",
779            "title5Open": "~A~@LP @LeftDisplay @B { \a }",
780            "anchor": "@Tag { \a }\n",
781            "blockVerbOpen": "@LP @ID @F @RawVerbatim @Begin",
782            "blockVerbClose": "@End @RawVerbatim",
783            "blockQuoteOpen": "@QD {",
784            "blockQuoteClose": "}",
785            # enclosed inside {} to deal with joined**words**
786            "fontMonoOpen": "{@F {",
787            "fontMonoClose": "}}",
788            "fontBoldOpen": "{@B {",
789            "fontBoldClose": "}}",
790            "fontItalicOpen": "{@II {",
791            "fontItalicClose": "}}",
792            "fontUnderlineOpen": "{@Underline{",
793            "fontUnderlineClose": "}}",
794            # the full form is more readable, but could be BL EL LI NL TL DTI
795            "listOpen": "@BulletList",
796            "listClose": "@EndList",
797            "listItemOpen": "@ListItem{",
798            "listItemClose": "}",
799            "numlistOpen": "@NumberedList",
800            "numlistClose": "@EndList",
801            "numlistItemOpen": "@ListItem{",
802            "numlistItemClose": "}",
803            "deflistOpen": "@TaggedList",
804            "deflistClose": "@EndList",
805            "deflistItem1Open": "@DropTagItem {",
806            "deflistItem1Close": "}",
807            "deflistItem2Open": "{",
808            "deflistItem2Close": "}",
809            "bar1": "@DP @FullWidthRule",
810            "url": "{blue @Colour { \a }}",
811            "urlMark": "\a ({blue @Colour { \a }})",
812            "email": "{blue @Colour { \a }}",
813            "emailMark": "\a ({blue @Colour{ \a }})",
814            "img": "~A~@IncludeGraphic { \a }",  # eps only!
815            "_imgAlignLeft": "@LeftDisplay ",
816            "_imgAlignRight": "@RightDisplay ",
817            "_imgAlignCenter": "@CentredDisplay ",
818            # lout tables are *way* too complicated, no support for now
819            # 'tableOpen'            : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
820            # 'tableClose'           : '}'     ,
821            # 'tableRowOpen'         : '@Rowa\n'       ,
822            # 'tableTitleRowOpen'    : '@HeaderRowa'       ,
823            # 'tableCenterAlign'     : '@CentredDisplay '         ,
824            # 'tableCellOpen'        : '\a {'                     ,  # A, B, ...
825            # 'tableCellClose'       : '}'                        ,
826            # '_tableBorder'         : '\nrule {yes}'             ,
827            "comment": "# \a",
828            # @MakeContents must be on the config file
829            "TOC": "@DP @ContentsGoesHere @DP",
830            "pageBreak": "@NP",
831            "EOD": "@End @Text",
832        },
833        # https://moinmo.in/HelpOnMoinWikiSyntax
834        "moin": {
835            "title1": "= \a =",
836            "title2": "== \a ==",
837            "title3": "=== \a ===",
838            "title4": "==== \a ====",
839            "title5": "===== \a =====",
840            "blockVerbOpen": "{{{",
841            "blockVerbClose": "}}}",
842            "blockQuoteLine": "  ",
843            "fontMonoOpen": "{{{",
844            "fontMonoClose": "}}}",
845            "fontBoldOpen": "'''",
846            "fontBoldClose": "'''",
847            "fontItalicOpen": "''",
848            "fontItalicClose": "''",
849            "fontUnderlineOpen": "__",
850            "fontUnderlineClose": "__",
851            "fontStrikeOpen": "--(",
852            "fontStrikeClose": ")--",
853            "listItemOpen": " * ",
854            "numlistItemOpen": " \a. ",
855            "deflistItem1Open": " ",
856            "deflistItem1Close": "::",
857            "deflistItem2LinePrefix": " :: ",
858            "bar1": "----",
859            "bar2": "--------",
860            "url": "[[\a]]",
861            "urlMark": "[[\a|\a]]",
862            "email": "\a",
863            "emailMark": "[[mailto:\a|\a]]",
864            "img": "[\a]",
865            "tableRowOpen": "||",
866            "tableCellOpen": "~A~",
867            "tableCellClose": "||",
868            "tableTitleCellClose": "||",
869            "_tableCellAlignRight": "<)>",
870            "_tableCellAlignCenter": "<:>",
871            "comment": "/* \a */",
872            "TOC": "[[TableOfContents]]",
873        },
874        # http://code.google.com/p/support/wiki/WikiSyntax
875        "gwiki": {
876            "title1": "= \a =",
877            "title2": "== \a ==",
878            "title3": "=== \a ===",
879            "title4": "==== \a ====",
880            "title5": "===== \a =====",
881            "blockVerbOpen": "{{{",
882            "blockVerbClose": "}}}",
883            "blockQuoteLine": "  ",
884            "fontMonoOpen": "{{{",
885            "fontMonoClose": "}}}",
886            "fontBoldOpen": "*",
887            "fontBoldClose": "*",
888            "fontItalicOpen": "_",  # underline == italic
889            "fontItalicClose": "_",
890            "fontStrikeOpen": "~~",
891            "fontStrikeClose": "~~",
892            "listItemOpen": " * ",
893            "numlistItemOpen": " # ",
894            "url": "\a",
895            "urlMark": "[\a \a]",
896            "email": "mailto:\a",
897            "emailMark": "[mailto:\a \a]",
898            "img": "[\a]",
899            "tableRowOpen": "|| ",
900            "tableRowClose": " ||",
901            "tableCellSep": " || ",
902        },
903        # http://powerman.name/doc/asciidoc
904        "adoc": {
905            "title1": "== \a",
906            "title2": "=== \a",
907            "title3": "==== \a",
908            "title4": "===== \a",
909            "title5": "===== \a",
910            "blockVerbOpen": "----",
911            "blockVerbClose": "----",
912            "fontMonoOpen": "+",
913            "fontMonoClose": "+",
914            "fontBoldOpen": "*",
915            "fontBoldClose": "*",
916            "fontItalicOpen": "_",
917            "fontItalicClose": "_",
918            "listItemOpen": "- ",
919            "listItemLine": "\t",
920            "numlistItemOpen": ". ",
921            "url": "\a",
922            "urlMark": "\a[\a]",
923            "email": "mailto:\a",
924            "emailMark": "mailto:\a[\a]",
925            "img": "image::\a[]",
926        },
927        # http://wiki.splitbrain.org/wiki:syntax
928        # Hint: <br> is \\ $
929        # Hint: You can add footnotes ((This is a footnote))
930        "doku": {
931            "title1": "===== \a =====",
932            "title2": "==== \a ====",
933            "title3": "=== \a ===",
934            "title4": "== \a ==",
935            "title5": "= \a =",
936            # DokuWiki uses '  ' identation to mark verb blocks (see indentverbblock)
937            "blockQuoteLine": ">",
938            "fontMonoOpen": "''",
939            "fontMonoClose": "''",
940            "fontBoldOpen": "**",
941            "fontBoldClose": "**",
942            "fontItalicOpen": "//",
943            "fontItalicClose": "//",
944            "fontUnderlineOpen": "__",
945            "fontUnderlineClose": "__",
946            "fontStrikeOpen": "<del>",
947            "fontStrikeClose": "</del>",
948            "listItemOpen": "  * ",
949            "numlistItemOpen": "  - ",
950            "bar1": "----",
951            "url": "[[\a]]",
952            "urlMark": "[[\a|\a]]",
953            "email": "[[\a]]",
954            "emailMark": "[[\a|\a]]",
955            "img": "{{\a}}",
956            "imgAlignLeft": "{{\a }}",
957            "imgAlignRight": "{{ \a}}",
958            "imgAlignCenter": "{{ \a }}",
959            "tableTitleRowOpen": "^ ",
960            "tableTitleRowClose": " ^",
961            "tableTitleCellSep": " ^ ",
962            "tableRowOpen": "| ",
963            "tableRowClose": " |",
964            "tableCellSep": " | ",
965            # DokuWiki has no attributes. The content must be aligned!
966            # '_tableCellAlignRight' : '<)>'           , # ??
967            # '_tableCellAlignCenter': '<:>'           , # ??
968            # DokuWiki colspan is the same as txt2tags' with multiple |||
969            # 'comment'             : '## \a'         , # ??
970            # TOC is automatic
971        },
972        # http://www.pmwiki.org/wiki/PmWiki/TextFormattingRules
973        "pmw": {
974            "title1": "~A~! \a ",
975            "title2": "~A~!! \a ",
976            "title3": "~A~!!! \a ",
977            "title4": "~A~!!!! \a ",
978            "title5": "~A~!!!!! \a ",
979            "blockQuoteOpen": "->",
980            "blockQuoteClose": "\n",
981            # In-text font
982            "fontMonoOpen": "@@",
983            "fontMonoClose": "@@",
984            "fontBoldOpen": "'''",
985            "fontBoldClose": "'''",
986            "fontItalicOpen": "''",
987            "fontItalicClose": "''",
988            "fontUnderlineOpen": "{+",
989            "fontUnderlineClose": "+}",
990            "fontStrikeOpen": "{-",
991            "fontStrikeClose": "-}",
992            # Lists
993            "listItemLine": "*",
994            "numlistItemLine": "#",
995            "deflistItem1Open": ": ",
996            "deflistItem1Close": ":",
997            # Verbatim block
998            "blockVerbOpen": "[@",
999            "blockVerbClose": "@]",
1000            "bar1": "----",
1001            # URL, email and anchor
1002            "url": "\a",
1003            "urlMark": "[[\a -> \a]]",
1004            "email": "\a",
1005            "emailMark": "[[\a -> mailto:\a]]",
1006            "anchor": "[[#\a]]\n",
1007            # Image markup
1008            "img": "\a",
1009            # Table attributes
1010            "tableTitleRowOpen": "||! ",
1011            "tableTitleRowClose": "||",
1012            "tableTitleCellSep": " ||!",
1013            "tableRowOpen": "||",
1014            "tableRowClose": "||",
1015            "tableCellSep": " ||",
1016        },
1017        # http://en.wikipedia.org/wiki/Help:Editing
1018        "wiki": {
1019            "title1": "== \a ==",
1020            "title2": "=== \a ===",
1021            "title3": "==== \a ====",
1022            "title4": "===== \a =====",
1023            "title5": "====== \a ======",
1024            "blockVerbOpen": "<pre>",
1025            "blockVerbClose": "</pre>",
1026            "blockQuoteOpen": "<blockquote>",
1027            "blockQuoteClose": "</blockquote>",
1028            "fontMonoOpen": "<tt>",
1029            "fontMonoClose": "</tt>",
1030            "fontBoldOpen": "'''",
1031            "fontBoldClose": "'''",
1032            "fontItalicOpen": "''",
1033            "fontItalicClose": "''",
1034            "fontUnderlineOpen": "<u>",
1035            "fontUnderlineClose": "</u>",
1036            "fontStrikeOpen": "<s>",
1037            "fontStrikeClose": "</s>",
1038            # XXX Mixed lists not working: *#* list inside numlist inside list
1039            "listItemLine": "*",
1040            "numlistItemLine": "#",
1041            "deflistItem1Open": "; ",
1042            "deflistItem2LinePrefix": ": ",
1043            "bar1": "----",
1044            "url": "[\a]",
1045            "urlMark": "[\a \a]",
1046            "email": "mailto:\a",
1047            "emailMark": "[mailto:\a \a]",
1048            # [[Image:foo.png|right|Optional alt/caption text]]
1049            # (right, left, center, none)
1050            "img": "[[Image:\a~A~]]",
1051            "_imgAlignLeft": "|left",
1052            "_imgAlignCenter": "|center",
1053            "_imgAlignRight": "|right",
1054            # {| border="1" cellspacing="0" cellpadding="4" align="center"
1055            "tableOpen": '{|~A~~B~ cellpadding="4"',
1056            "tableClose": "|}",
1057            "tableRowOpen": "|-\n| ",
1058            "tableTitleRowOpen": "|-\n! ",
1059            "tableCellSep": " || ",
1060            "tableTitleCellSep": " !! ",
1061            "_tableBorder": ' border="1"',
1062            "_tableAlignCenter": ' align="center"',
1063            "comment": "<!-- \a -->",
1064            "TOC": "__TOC__",
1065        },
1066        # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX
1067        # http://en.wikipedia.org/wiki/MagicPoint
1068        "mgp": {
1069            "paragraphOpen": '%font "normal", size 5',
1070            "title1": "%page\n\n\a\n",
1071            "title2": "%page\n\n\a\n",
1072            "title3": "%page\n\n\a\n",
1073            "title4": "%page\n\n\a\n",
1074            "title5": "%page\n\n\a\n",
1075            "blockVerbOpen": '%font "mono"',
1076            "blockVerbClose": '%font "normal"',
1077            "blockQuoteOpen": '%prefix "       "',
1078            "blockQuoteClose": '%prefix "  "',
1079            "fontMonoOpen": '\n%cont, font "mono"\n',
1080            "fontMonoClose": '\n%cont, font "normal"\n',
1081            "fontBoldOpen": '\n%cont, font "normal-b"\n',
1082            "fontBoldClose": '\n%cont, font "normal"\n',
1083            "fontItalicOpen": '\n%cont, font "normal-i"\n',
1084            "fontItalicClose": '\n%cont, font "normal"\n',
1085            "fontUnderlineOpen": '\n%cont, fore "cyan"\n',
1086            "fontUnderlineClose": '\n%cont, fore "white"\n',
1087            "listItemLine": "\t",
1088            "numlistItemLine": "\t",
1089            "numlistItemOpen": "\a. ",
1090            "deflistItem1Open": '\t\n%cont, font "normal-b"\n',
1091            "deflistItem1Close": '\n%cont, font "normal"\n',
1092            "bar1": '%bar "white" 5',
1093            "bar2": "%pause",
1094            "url": '\n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
1095            "urlMark": '\a \n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
1096            "email": '\n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
1097            "emailMark": '\a \n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
1098            "img": '~A~\n%newimage "\a"\n%left\n',
1099            "_imgAlignLeft": "\n%left",
1100            "_imgAlignRight": "\n%right",
1101            "_imgAlignCenter": "\n%center",
1102            "comment": "%% \a",
1103            "pageBreak": "%page\n\n\n",
1104            "EOD": "%%EOD",
1105        },
1106        # man groff_man ; man 7 groff
1107        "man": {
1108            "paragraphOpen": ".P",
1109            "title1": ".SH \a",
1110            "title2": ".SS \a",
1111            "title3": ".SS \a",
1112            "title4": ".SS \a",
1113            "title5": ".SS \a",
1114            "blockVerbOpen": ".nf",
1115            "blockVerbClose": ".fi\n",
1116            "blockQuoteOpen": ".RS",
1117            "blockQuoteClose": ".RE",
1118            "fontBoldOpen": "\\fB",
1119            "fontBoldClose": "\\fR",
1120            "fontItalicOpen": "\\fI",
1121            "fontItalicClose": "\\fR",
1122            "listOpen": ".RS",
1123            "listItemOpen": ".IP \\(bu 3\n",
1124            "listClose": ".RE\n.IP",
1125            "numlistOpen": ".RS",
1126            "numlistItemOpen": ".IP \a. 3\n",
1127            "numlistClose": ".RE\n.IP",
1128            "deflistItem1Open": ".TP\n",
1129            "bar1": "\n\n",
1130            "url": "\a",
1131            "urlMark": "\a (\a)",
1132            "email": "\a",
1133            "emailMark": "\a (\a)",
1134            "img": "\a",
1135            "tableOpen": ".TS\n~A~~B~tab(^); ~C~.",
1136            "tableClose": ".TE",
1137            "tableRowOpen": " ",
1138            "tableCellSep": "^",
1139            "_tableAlignCenter": "center, ",
1140            "_tableBorder": "allbox, ",
1141            "_tableColAlignLeft": "l",
1142            "_tableColAlignRight": "r",
1143            "_tableColAlignCenter": "c",
1144            "comment": '.\\" \a',
1145        },
1146        # http://www.wikicreole.org/wiki/AllMarkup
1147        "creole": {
1148            "title1": "= \a =",
1149            "title2": "== \a ==",
1150            "title3": "=== \a ===",
1151            "title4": "==== \a ====",
1152            "title5": "===== \a =====",
1153            "blockVerbOpen": "{{{",
1154            "blockVerbClose": "}}}",
1155            "blockQuoteLine": "  ",
1156            "fontMonoOpen": None,  # planned for 2.0,
1157            "fontMonoClose": None,  # meanwhile we disable it
1158            "fontBoldOpen": "**",
1159            "fontBoldClose": "**",
1160            "fontItalicOpen": "//",
1161            "fontItalicClose": "//",
1162            "fontUnderlineOpen": "//",  # no underline in 1.0, planned for 2.0,
1163            "fontUnderlineClose": "//",  # meanwhile we use italic (emphasized)
1164            "fontStrikeOpen": None,  # planned for 2.0,
1165            "fontStrikeClose": None,  # meanwhile we disable it
1166            "listItemLine": "*",
1167            "numlistItemLine": "#",
1168            "deflistItem2LinePrefix": ":",
1169            "bar1": "----",
1170            "url": "[[\a]]",
1171            "urlMark": "[[\a|\a]]",
1172            "img": "{{\a}}",
1173            "tableTitleRowOpen": "|= ",
1174            "tableTitleRowClose": "|",
1175            "tableTitleCellSep": " |= ",
1176            "tableRowOpen": "| ",
1177            "tableRowClose": " |",
1178            "tableCellSep": " | ",
1179            # TODO: placeholder (mark for unknown syntax)
1180            # if possible: http://www.wikicreole.org/wiki/Placeholder
1181        },
1182        # regular markdown: http://daringfireball.net/projects/markdown/syntax
1183        # markdown extra:   http://michelf.com/projects/php-markdown/extra/
1184        "md": {
1185            "title1": "# \a ",
1186            "title2": "## \a ",
1187            "title3": "### \a ",
1188            "title4": "#### \a ",
1189            "title5": "##### \a ",
1190            "blockVerbLine": "    ",
1191            "blockQuoteLine": "> ",
1192            "fontMonoOpen": "`",
1193            "fontMonoClose": "`",
1194            "fontBoldOpen": "**",
1195            "fontBoldClose": "**",
1196            "fontItalicOpen": "*",
1197            "fontItalicClose": "*",
1198            "fontUnderlineOpen": None,
1199            "fontUnderlineClose": None,
1200            "fontStrikeOpen": "~~",
1201            "fontStrikeClose": "~~",
1202            # Lists
1203            "listOpenCompact": None,
1204            "listItemLine": " ",
1205            "listItemOpen": "*",
1206            "numlistItemLine": None,
1207            "numlistItemOpen": "1.",
1208            "deflistItem1Open": ": ",
1209            "deflistItem1Close": None,
1210            "deflistItem2Open": None,
1211            "deflistItem2Close": None,
1212            # Verbatim block
1213            "blockVerbOpen": None,
1214            "blockVerbClose": None,
1215            "bar1": "---",
1216            "bar2": "---",
1217            # URL, email and anchor
1218            "url": "\a",
1219            "urlMark": "[\a](\a)",
1220            "email": "<\a>",
1221            "emailMark": "[\a](mailto:\a)",
1222            "anchor": None,
1223            # Image markup
1224            "img": "![](\a)",
1225            "imgAlignLeft": None,
1226            "imgAlignRight": None,
1227            "imgAlignCenter": None,
1228            # Table attributes
1229            "tableTitleRowOpen": "| ",
1230            "tableTitleRowClose": "|\n|---------------|",
1231            "tableTitleCellSep": " |",
1232            "tableRowOpen": "|",
1233            "tableRowClose": "|",
1234            "tableCellSep": " |",
1235        },
1236    }
1237    assert set(alltags) == set(TARGETS)
1238
1239    for target, tags in alltags.items():
1240        for key, value in tags.items():
1241            if key not in keys:
1242                raise AssertionError("{} target has invalid key {}".format(target, key))
1243            if value is not None and not value:
1244                raise AssertionError("{} target drops {}".format(target, key))
1245
1246    # Compose the target tags dictionary.
1247    tags = collections.defaultdict(str)
1248    for key, value in alltags[config["target"]].items():
1249        if value:  # Skip unsupported markup.
1250            tags[key] = maskEscapeChar(value)
1251
1252    # Map strong line to pagebreak
1253    if rules["mapbar2pagebreak"] and tags["pageBreak"]:
1254        tags["bar2"] = tags["pageBreak"]
1255
1256    # Map strong line to separator if not defined
1257    if not tags["bar2"] and tags["bar1"]:
1258        tags["bar2"] = tags["bar1"]
1259
1260    return tags
1261
1262
1263##############################################################################
1264
1265
1266def getRules(config):
1267    """Return all the target-specific syntax rules."""
1268    allrules = [
1269        # target rules (ON/OFF)
1270        "linkable",  # target supports external links
1271        "tableable",  # target supports tables
1272        "imglinkable",  # target supports images as links
1273        "imgalignable",  # target supports image alignment
1274        "imgasdefterm",  # target supports image as definition term
1275        "autonumberlist",  # target supports numbered lists natively
1276        "autonumbertitle",  # target supports numbered titles natively
1277        "stylable",  # target supports external style files
1278        "parainsidelist",  # lists items supports paragraph
1279        "compactlist",  # separate enclosing tags for compact lists
1280        "spacedlistitem",  # lists support blank lines between items
1281        "listnotnested",  # lists cannot be nested
1282        "quotenotnested",  # quotes cannot be nested
1283        "verbblocknotescaped",  # don't escape specials in verb block
1284        "verbblockfinalescape",  # do final escapes in verb block
1285        "escapeurl",  # escape special in link URL
1286        "labelbeforelink",  # label comes before the link on the tag
1287        "onelinepara",  # dump paragraph as a single long line
1288        "tabletitlerowinbold",  # manually bold any cell on table titles
1289        "tablecellstrip",  # strip extra spaces from each table cell
1290        "tablecellspannable",  # the table cells can have span attribute
1291        "tablecellmulticol",  # separate open+close tags for multicol cells
1292        "barinsidequote",  # bars are allowed inside quote blocks
1293        "finalescapetitle",  # perform final escapes on title lines
1294        "autotocnewpagebefore",  # break page before automatic TOC
1295        "autotocnewpageafter",  # break page after automatic TOC
1296        "autotocwithbars",  # automatic TOC surrounded by bars
1297        "mapbar2pagebreak",  # map the strong bar to a page break
1298        "titleblocks",  # titles must be on open/close section blocks
1299        # Target code beautify (ON/OFF)
1300        "indentverbblock",  # add leading spaces to verb block lines
1301        "breaktablecell",  # break lines after any table cell
1302        "breaktablelineopen",  # break line after opening table line
1303        "notbreaklistopen",  # don't break line after opening a new list
1304        "keepquoteindent",  # don't remove the leading TABs on quotes
1305        "keeplistindent",  # don't remove the leading spaces on lists
1306        "blankendautotoc",  # append a blank line at the auto TOC end
1307        "tagnotindentable",  # tags must be placed at the line beginning
1308        "spacedlistitemopen",  # append a space after the list item open tag
1309        "spacednumlistitemopen",  # append a space after the numlist item open tag
1310        "deflisttextstrip",  # strip the contents of the deflist text
1311        "blanksaroundpara",  # put a blank line before and after paragraphs
1312        "blanksaroundverb",  # put a blank line before and after verb blocks
1313        "blanksaroundquote",  # put a blank line before and after quotes
1314        "blanksaroundlist",  # put a blank line before and after lists
1315        "blanksaroundnumlist",  # put a blank line before and after numlists
1316        "blanksarounddeflist",  # put a blank line before and after deflists
1317        "blanksaroundtable",  # put a blank line before and after tables
1318        "blanksaroundbar",  # put a blank line before and after bars
1319        "blanksaroundtitle",  # put a blank line before and after titles
1320        "blanksaroundnumtitle",  # put a blank line before and after numtitles
1321        # Value settings
1322        "listmaxdepth",  # maximum depth for lists
1323        "quotemaxdepth",  # maximum depth for quotes
1324        "tablecellaligntype",  # type of table cell align: cell, column
1325    ]
1326
1327    rules_bank = {
1328        "txt": {
1329            "indentverbblock": 1,
1330            "spacedlistitem": 1,
1331            "parainsidelist": 1,
1332            "keeplistindent": 1,
1333            "barinsidequote": 1,
1334            "autotocwithbars": 1,
1335            "blanksaroundpara": 1,
1336            "blanksaroundverb": 1,
1337            "blanksaroundquote": 1,
1338            "blanksaroundlist": 1,
1339            "blanksaroundnumlist": 1,
1340            "blanksarounddeflist": 1,
1341            "blanksaroundtable": 1,
1342            "blanksaroundbar": 1,
1343            "blanksaroundtitle": 1,
1344            "blanksaroundnumtitle": 1,
1345        },
1346        "html": {
1347            "indentverbblock": 0,
1348            "linkable": 1,
1349            "stylable": 1,
1350            "escapeurl": 1,
1351            "imglinkable": 1,
1352            "imgalignable": 1,
1353            "imgasdefterm": 1,
1354            "autonumberlist": 1,
1355            "spacedlistitem": 1,
1356            "parainsidelist": 1,
1357            "tableable": 1,
1358            "tablecellstrip": 1,
1359            "breaktablecell": 1,
1360            "breaktablelineopen": 1,
1361            "keeplistindent": 1,
1362            "keepquoteindent": 1,
1363            "barinsidequote": 1,
1364            "autotocwithbars": 0,
1365            "tablecellspannable": 1,
1366            "tablecellaligntype": "cell",
1367            # 'blanksaroundpara':1,
1368            "blanksaroundverb": 1,
1369            # 'blanksaroundquote':1,
1370            "blanksaroundlist": 1,
1371            "blanksaroundnumlist": 1,
1372            "blanksarounddeflist": 1,
1373            "blanksaroundtable": 1,
1374            "blanksaroundbar": 1,
1375            "blanksaroundtitle": 1,
1376            "blanksaroundnumtitle": 1,
1377            "titleblocks": 1,
1378        },
1379        "sgml": {
1380            "linkable": 1,
1381            "escapeurl": 1,
1382            "autonumberlist": 1,
1383            "spacedlistitem": 1,
1384            "tableable": 1,
1385            "tablecellstrip": 1,
1386            "blankendautotoc": 1,
1387            "quotenotnested": 1,
1388            "keeplistindent": 1,
1389            "keepquoteindent": 1,
1390            "barinsidequote": 1,
1391            "finalescapetitle": 1,
1392            "tablecellaligntype": "column",
1393            "blanksaroundpara": 1,
1394            "blanksaroundverb": 1,
1395            "blanksaroundquote": 1,
1396            "blanksaroundlist": 1,
1397            "blanksaroundnumlist": 1,
1398            "blanksarounddeflist": 1,
1399            "blanksaroundtable": 1,
1400            "blanksaroundbar": 1,
1401            "blanksaroundtitle": 1,
1402            "blanksaroundnumtitle": 1,
1403        },
1404        "dbk": {
1405            "linkable": 1,
1406            "tableable": 0,  # activate when table tags are ready
1407            "imglinkable": 1,
1408            "imgalignable": 1,
1409            "imgasdefterm": 1,
1410            "autonumberlist": 1,
1411            "autonumbertitle": 1,
1412            "parainsidelist": 1,
1413            "spacedlistitem": 1,
1414            "titleblocks": 1,
1415        },
1416        "mgp": {
1417            "tagnotindentable": 1,
1418            "spacedlistitem": 1,
1419            "imgalignable": 1,
1420            "autotocnewpagebefore": 1,
1421            "blanksaroundpara": 1,
1422            "blanksaroundverb": 1,
1423            # 'blanksaroundquote':1,
1424            "blanksaroundlist": 1,
1425            "blanksaroundnumlist": 1,
1426            "blanksarounddeflist": 1,
1427            "blanksaroundtable": 1,
1428            "blanksaroundbar": 1,
1429            # 'blanksaroundtitle':1,
1430            # 'blanksaroundnumtitle':1,
1431        },
1432        "tex": {
1433            "stylable": 1,
1434            "escapeurl": 1,
1435            "autonumberlist": 1,
1436            "autonumbertitle": 1,
1437            "spacedlistitem": 1,
1438            "compactlist": 1,
1439            "parainsidelist": 1,
1440            "tableable": 1,
1441            "tablecellstrip": 1,
1442            "tabletitlerowinbold": 0,
1443            "verbblocknotescaped": 1,
1444            "keeplistindent": 1,
1445            "listmaxdepth": 4,  # deflist is 6
1446            "quotemaxdepth": 6,
1447            "barinsidequote": 1,
1448            "finalescapetitle": 1,
1449            "autotocnewpageafter": 1,
1450            "mapbar2pagebreak": 1,
1451            "tablecellaligntype": "column",
1452            "tablecellmulticol": 1,
1453            "blanksaroundpara": 1,
1454            "blanksaroundverb": 1,
1455            # 'blanksaroundquote':1,
1456            "blanksaroundlist": 1,
1457            "blanksaroundnumlist": 1,
1458            "blanksarounddeflist": 1,
1459            "blanksaroundtable": 1,
1460            "blanksaroundbar": 1,
1461            "blanksaroundtitle": 1,
1462            "blanksaroundnumtitle": 1,
1463        },
1464        "lout": {
1465            "keepquoteindent": 1,
1466            "deflisttextstrip": 1,
1467            "escapeurl": 1,
1468            "verbblocknotescaped": 1,
1469            "imgalignable": 1,
1470            "mapbar2pagebreak": 1,
1471            "titleblocks": 1,
1472            "autonumberlist": 1,
1473            "parainsidelist": 1,
1474            "blanksaroundpara": 1,
1475            "blanksaroundverb": 1,
1476            # 'blanksaroundquote':1,
1477            "blanksaroundlist": 1,
1478            "blanksaroundnumlist": 1,
1479            "blanksarounddeflist": 1,
1480            "blanksaroundtable": 1,
1481            "blanksaroundbar": 1,
1482            "blanksaroundtitle": 1,
1483            "blanksaroundnumtitle": 1,
1484        },
1485        "moin": {
1486            "spacedlistitem": 1,
1487            "linkable": 1,
1488            "keeplistindent": 1,
1489            "tableable": 1,
1490            "barinsidequote": 1,
1491            "tabletitlerowinbold": 1,
1492            "tablecellstrip": 1,
1493            "autotocwithbars": 1,
1494            "tablecellaligntype": "cell",
1495            "deflisttextstrip": 1,
1496            "blanksaroundpara": 1,
1497            "blanksaroundverb": 1,
1498            # 'blanksaroundquote':1,
1499            "blanksaroundlist": 1,
1500            "blanksaroundnumlist": 1,
1501            "blanksarounddeflist": 1,
1502            "blanksaroundtable": 1,
1503            # 'blanksaroundbar':1,
1504            "blanksaroundtitle": 1,
1505            "blanksaroundnumtitle": 1,
1506        },
1507        "gwiki": {
1508            "spacedlistitem": 1,
1509            "linkable": 1,
1510            "keeplistindent": 1,
1511            "tableable": 1,
1512            "tabletitlerowinbold": 1,
1513            "tablecellstrip": 1,
1514            "autonumberlist": 1,
1515            "blanksaroundpara": 1,
1516            "blanksaroundverb": 1,
1517            # 'blanksaroundquote':1,
1518            "blanksaroundlist": 1,
1519            "blanksaroundnumlist": 1,
1520            "blanksarounddeflist": 1,
1521            "blanksaroundtable": 1,
1522            # 'blanksaroundbar':1,
1523            "blanksaroundtitle": 1,
1524            "blanksaroundnumtitle": 1,
1525        },
1526        "adoc": {
1527            "spacedlistitem": 1,
1528            "linkable": 1,
1529            "keeplistindent": 1,
1530            "autonumberlist": 1,
1531            "autonumbertitle": 1,
1532            "listnotnested": 1,
1533            "blanksaroundpara": 1,
1534            "blanksaroundverb": 1,
1535            "blanksaroundlist": 1,
1536            "blanksaroundnumlist": 1,
1537            "blanksarounddeflist": 1,
1538            "blanksaroundtable": 1,
1539            "blanksaroundtitle": 1,
1540            "blanksaroundnumtitle": 1,
1541        },
1542        "doku": {
1543            "indentverbblock": 1,  # DokuWiki uses '  ' to mark verb blocks
1544            "spacedlistitem": 1,
1545            "linkable": 1,
1546            "keeplistindent": 1,
1547            "tableable": 1,
1548            "barinsidequote": 1,
1549            "tablecellstrip": 1,
1550            "autotocwithbars": 1,
1551            "autonumberlist": 1,
1552            "imgalignable": 1,
1553            "tablecellaligntype": "cell",
1554            "blanksaroundpara": 1,
1555            "blanksaroundverb": 1,
1556            # 'blanksaroundquote':1,
1557            "blanksaroundlist": 1,
1558            "blanksaroundnumlist": 1,
1559            "blanksarounddeflist": 1,
1560            "blanksaroundtable": 1,
1561            "blanksaroundbar": 1,
1562            "blanksaroundtitle": 1,
1563            "blanksaroundnumtitle": 1,
1564        },
1565        "pmw": {
1566            "indentverbblock": 1,
1567            "spacedlistitem": 1,
1568            "linkable": 1,
1569            "labelbeforelink": 1,
1570            # 'keeplistindent':1,
1571            "tableable": 1,
1572            "barinsidequote": 1,
1573            "tablecellstrip": 1,
1574            "autotocwithbars": 1,
1575            "autonumberlist": 1,
1576            "spacedlistitemopen": 1,
1577            "spacednumlistitemopen": 1,
1578            "imgalignable": 1,
1579            "tabletitlerowinbold": 1,
1580            "tablecellaligntype": "cell",
1581            "blanksaroundpara": 1,
1582            "blanksaroundverb": 1,
1583            "blanksaroundquote": 1,
1584            "blanksaroundlist": 1,
1585            "blanksaroundnumlist": 1,
1586            "blanksarounddeflist": 1,
1587            "blanksaroundtable": 1,
1588            "blanksaroundbar": 1,
1589            "blanksaroundtitle": 1,
1590            "blanksaroundnumtitle": 1,
1591        },
1592        "wiki": {
1593            "linkable": 1,
1594            "tableable": 1,
1595            "tablecellstrip": 1,
1596            "autotocwithbars": 1,
1597            "spacedlistitemopen": 1,
1598            "spacednumlistitemopen": 1,
1599            "deflisttextstrip": 1,
1600            "autonumberlist": 1,
1601            "imgalignable": 1,
1602            "blanksaroundpara": 1,
1603            "blanksaroundverb": 1,
1604            # 'blanksaroundquote':1,
1605            "blanksaroundlist": 1,
1606            "blanksaroundnumlist": 1,
1607            "blanksarounddeflist": 1,
1608            "blanksaroundtable": 1,
1609            "blanksaroundbar": 1,
1610            "blanksaroundtitle": 1,
1611            "blanksaroundnumtitle": 1,
1612        },
1613        "man": {
1614            "spacedlistitem": 1,
1615            "tagnotindentable": 1,
1616            "tableable": 1,
1617            "tablecellaligntype": "column",
1618            "tabletitlerowinbold": 1,
1619            "tablecellstrip": 1,
1620            "barinsidequote": 1,
1621            "parainsidelist": 0,
1622            "blanksaroundpara": 0,
1623            "blanksaroundverb": 1,
1624            # 'blanksaroundquote':1,
1625            "blanksaroundlist": 1,
1626            "blanksaroundnumlist": 1,
1627            "blanksarounddeflist": 1,
1628            "blanksaroundtable": 1,
1629            # 'blanksaroundbar':1,
1630            "blanksaroundtitle": 0,
1631            "blanksaroundnumtitle": 1,
1632        },
1633        "creole": {
1634            "linkable": 1,
1635            "tableable": 1,
1636            "imglinkable": 1,
1637            "tablecellstrip": 1,
1638            "autotocwithbars": 1,
1639            "spacedlistitemopen": 1,
1640            "spacednumlistitemopen": 1,
1641            "deflisttextstrip": 1,
1642            "verbblocknotescaped": 1,
1643            "blanksaroundpara": 1,
1644            "blanksaroundverb": 1,
1645            "blanksaroundquote": 1,
1646            "blanksaroundlist": 1,
1647            "blanksaroundnumlist": 1,
1648            "blanksarounddeflist": 1,
1649            "blanksaroundtable": 1,
1650            "blanksaroundbar": 1,
1651            "blanksaroundtitle": 1,
1652        },
1653        "md": {
1654            # "keeplistindent": 1,
1655            "linkable": 1,
1656            "labelbeforelink": 1,
1657            "tableable": 1,
1658            "imglinkable": 1,
1659            "tablecellstrip": 1,
1660            "autonumberlist": 1,
1661            "spacedlistitemopen": 1,
1662            "spacednumlistitemopen": 1,
1663            "deflisttextstrip": 1,
1664            "blanksaroundpara": 1,
1665            "blanksaroundlist": 1,
1666            "blanksaroundnumlist": 1,
1667            # "blanksarounddeflist": 1,
1668            "blanksaroundtable": 1,
1669            "blanksaroundbar": 1,
1670            "blanksaroundtitle": 1,
1671        },
1672    }
1673    assert set(rules_bank) == set(TARGETS)
1674
1675    for target, rules in rules_bank.items():
1676        for rule in rules:
1677            if rule not in allrules:
1678                raise AssertionError(
1679                    "{} target has invalid rule {}".format(target, rule)
1680                )
1681
1682    ret = collections.defaultdict(int)
1683    ret.update(rules_bank[config["target"]])
1684    return ret
1685
1686
1687##############################################################################
1688
1689
1690def getRegexes():
1691    "Returns all the regexes used to find the t2t marks"
1692
1693    bank = {
1694        "blockVerbOpen": re.compile(r"^```\s*$"),
1695        "blockVerbClose": re.compile(r"^```\s*$"),
1696        "blockRawOpen": re.compile(r'^"""\s*$'),
1697        "blockRawClose": re.compile(r'^"""\s*$'),
1698        "blockTaggedOpen": re.compile(r"^'''\s*$"),
1699        "blockTaggedClose": re.compile(r"^'''\s*$"),
1700        "blockCommentOpen": re.compile(r"^%%%\s*$"),
1701        "blockCommentClose": re.compile(r"^%%%\s*$"),
1702        "quote": re.compile(r"^\t+"),
1703        "1lineVerb": re.compile(r"^``` (?=.)"),
1704        "1lineRaw": re.compile(r'^""" (?=.)'),
1705        "1lineTagged": re.compile(r"^''' (?=.)"),
1706        # mono, raw, bold, italic, underline:
1707        # - marks must be glued with the contents, no boundary spaces
1708        # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
1709        "fontMono": re.compile(r"``([^\s](|.*?[^\s])`*)``"),
1710        "raw": re.compile(r'""([^\s](|.*?[^\s])"*)""'),
1711        "tagged": re.compile(r"''([^\s](|.*?[^\s])'*)''"),
1712        "fontBold": re.compile(r"\*\*([^\s](|.*?[^\s])\**)\*\*"),
1713        "fontItalic": re.compile(r"//([^\s](|.*?[^\s])/*)//"),
1714        "fontUnderline": re.compile(r"__([^\s](|.*?[^\s])_*)__"),
1715        "fontStrike": re.compile(r"--([^\s](|.*?[^\s])-*)--"),
1716        "list": re.compile(r"^( *)(-) (?=[^ ])"),
1717        "numlist": re.compile(r"^( *)(\+) (?=[^ ])"),
1718        "deflist": re.compile(r"^( *)(:) (.*)$"),
1719        "listclose": re.compile(r"^( *)([-+:])\s*$"),
1720        "bar": re.compile(r"^(\s*)([_=-]{20,})\s*$"),
1721        "table": re.compile(r"^ *\|([|_/])? "),
1722        "blankline": re.compile(r"^\s*$"),
1723        "comment": re.compile(r"^%"),
1724        # Auxiliary tag regexes
1725        "_imgAlign": re.compile(r"~A~", re.I),
1726        "_tableAlign": re.compile(r"~A~", re.I),
1727        "_anchor": re.compile(r"~A~", re.I),
1728        "_tableBorder": re.compile(r"~B~", re.I),
1729        "_tableColAlign": re.compile(r"~C~", re.I),
1730        "_tableCellColSpan": re.compile(r"~S~", re.I),
1731        "_tableCellAlign": re.compile(r"~A~", re.I),
1732    }
1733
1734    # Special char to place data on TAGs contents  (\a == bell)
1735    bank["x"] = re.compile("\a")
1736
1737    # Almost complicated title regexes ;)
1738    titskel = r"^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$"
1739    bank["title"] = re.compile(titskel % ("[=]{1,5}", "[^=](|.*[^=])"))
1740    bank["numtitle"] = re.compile(titskel % ("[+]{1,5}", "[^+](|.*[^+])"))
1741
1742    # Complicated regexes begin here ;)
1743    #
1744    # Textual descriptions on --help's style: [...] is optional, | is OR
1745
1746    # First, some auxiliary variables
1747    #
1748
1749    # [image.EXT]
1750    patt_img = r"\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp|svg))\]"
1751
1752    # Link things
1753    # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
1754    # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@
1755    # Recomended order: scheme://user:pass@domain/path?query=foo#anchor
1756    # Also works      : scheme://user:pass@domain/path#anchor?query=foo
1757    # TODO form: !'():
1758    urlskel = {
1759        "proto": r"(https?|ftp|news|telnet|gopher|wais)://",
1760        "guess": r"(www[23]?|ftp)\.",  # w/out proto, try to guess
1761        "login": r"A-Za-z0-9_.-",  # for ftp://login@domain.com
1762        "pass": r"[^ @]*",  # for ftp://login:pass@dom.com
1763        "chars": r"A-Za-z0-9%._/~:,=$@&+-",  # %20(space), :80(port), D&D
1764        "anchor": r"A-Za-z0-9%._-",  # %nn(encoded)
1765        "form": r"A-Za-z0-9/%&=+:;.,$@*_-",  # .,@*_-(as is)
1766        "punct": r".,;:!?",
1767    }
1768
1769    # username [ :password ] @
1770    patt_url_login = r"([{}]+(:{})?@)?".format(urlskel["login"], urlskel["pass"])
1771
1772    # [ http:// ] [ username:password@ ] domain.com [ / ]
1773    #     [ #anchor | ?form=data ]
1774    retxt_url = r"\b({}{}|{})[{}]+\b/*(\?[{}]+)?(#[{}]*)?".format(
1775        urlskel["proto"],
1776        patt_url_login,
1777        urlskel["guess"],
1778        urlskel["chars"],
1779        urlskel["form"],
1780        urlskel["anchor"],
1781    )
1782
1783    # filename | [ filename ] #anchor
1784    retxt_url_local = r"[{}]+|[{}]*(#[{}]*)".format(
1785        urlskel["chars"], urlskel["chars"], urlskel["anchor"]
1786    )
1787
1788    # user@domain [ ?form=data ]
1789    patt_email = r"\b[{}]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{{2,4}}\b(\?[{}]+)?".format(
1790        urlskel["login"], urlskel["form"]
1791    )
1792
1793    # Saving for future use
1794    bank["_urlskel"] = urlskel
1795
1796    # And now the real regexes
1797
1798    bank["email"] = re.compile(patt_email, re.I)
1799
1800    # email | url
1801    bank["link"] = re.compile(r"{}|{}".format(retxt_url, patt_email), re.I)
1802
1803    # \[ label | imagetag    url | email | filename \]
1804    bank["linkmark"] = re.compile(
1805        r"\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]"
1806        % (patt_img, retxt_url, patt_email, retxt_url_local),
1807        re.I,
1808    )
1809
1810    # Image
1811    bank["img"] = re.compile(patt_img, re.I)
1812
1813    # Special things
1814    bank["special"] = re.compile(r"^%!\s*")
1815    return bank
1816
1817
1818# END OF regex nightmares
1819
1820
1821class error(Exception):
1822    pass
1823
1824
1825def Quit(msg=""):
1826    if msg:
1827        print(msg)
1828    sys.exit(0)
1829
1830
1831def Error(msg):
1832    msg = "%s: Error: " % my_name + msg
1833    raise error(msg)
1834
1835
1836def getTraceback():
1837    try:
1838        from traceback import format_exception
1839
1840        etype, value, tb = sys.exc_info()
1841        return "".join(format_exception(etype, value, tb))
1842    except Exception:
1843        pass
1844
1845
1846def getUnknownErrorMessage():
1847    msg = "{}\n{} ({}):\n\n{}".format(
1848        "Sorry! Txt2tags aborted by an unknown error.",
1849        "Please send the following Error Traceback to the author",
1850        my_email,
1851        getTraceback(),
1852    )
1853    return msg
1854
1855
1856def Message(msg, level):
1857    if level <= VERBOSE and not QUIET:
1858        prefix = "-" * 5
1859        print("{} {}".format(prefix * level, msg))
1860
1861
1862def Debug(msg, id_=0, linenr=None):
1863    """Show debug messages, categorized."""
1864    if QUIET or not DEBUG:
1865        return
1866    ids = ["INI", "CFG", "SRC", "BLK", "HLD", "GUI", "OUT", "DET"]
1867    if linenr is not None:
1868        msg = "LINE %04d: %s" % (linenr, msg)
1869    print("++ {}: {}".format(ids[id_], msg))
1870
1871
1872def Readfile(file_path):
1873    if file_path == "-":
1874        try:
1875            contents = sys.stdin.read()
1876        except KeyboardInterrupt:
1877            Error("You must feed me with data on STDIN!")
1878    else:
1879        try:
1880            with io.open(file_path, encoding=ENCODING) as f:
1881                contents = f.read()
1882        except IOError as exception:
1883            Error("Cannot read file: {}\n{}".format(file_path, exception))
1884    lines = contents.splitlines()
1885    Message("File read (%d lines): %s" % (len(lines), file_path), 2)
1886    return lines
1887
1888
1889def Savefile(file_path, lines):
1890    contents = "\n".join(lines) + "\n"
1891    try:
1892        with io.open(file_path, "w", encoding=ENCODING) as f:
1893            try:
1894                f.write(contents)
1895            except TypeError:
1896                f.write(contents.decode(ENCODING))
1897    except IOError as exception:
1898        Error("Cannot open file for writing: {}\n{}".format(file_path, exception))
1899
1900
1901def dotted_spaces(txt=""):
1902    return txt.replace(" ", ".")
1903
1904
1905# TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
1906def get_rc_path():
1907    "Return the full path for the users' RC file"
1908    # Try to get the path from an env var. if yes, we're done
1909    user_defined = os.environ.get("T2TCONFIG")
1910    if user_defined:
1911        return user_defined
1912    # Env var not found, so perform automatic path composing
1913    # Set default filename according system platform
1914    rc_names = {"default": ".txt2tagsrc", "win": "_t2trc"}
1915    rc_file = rc_names.get(sys.platform[:3]) or rc_names["default"]
1916    # The file must be on the user directory, but where is this dir?
1917    rc_dir_search = ["HOME", "HOMEPATH"]
1918    for var in rc_dir_search:
1919        rc_dir = os.environ.get(var)
1920        if rc_dir:
1921            break
1922    # rc dir found, now we must join dir+file to compose the full path
1923    if rc_dir:
1924        # Compose path and return it if the file exists
1925        rc_path = os.path.join(rc_dir, rc_file)
1926        # On windows, prefix with the drive (%homedrive%: 2k/XP/NT)
1927        if sys.platform.startswith("win"):
1928            rc_drive = os.environ.get("HOMEDRIVE")
1929            rc_path = os.path.join(rc_drive, rc_path)
1930        return rc_path
1931    # Sorry, not found
1932    return ""
1933
1934
1935##############################################################################
1936
1937
1938class CommandLine:
1939    """
1940    Command Line class - Masters command line
1941
1942    This class checks and extract data from the provided command line.
1943    The --long options and flags are taken from the global OPTIONS,
1944    FLAGS and ACTIONS dictionaries. The short options are registered
1945    here, and also their equivalence to the long ones.
1946
1947    _compose_short_opts() -> str
1948    _compose_long_opts() -> list
1949            Compose the valid short and long options list, on the
1950            'getopt' format.
1951
1952    parse() -> (opts, args)
1953            Call getopt to check and parse the command line.
1954            It expects to receive the command line as a list, and
1955            without the program name (sys.argv[1:]).
1956
1957    get_raw_config() -> [RAW config]
1958            Scans command line and convert the data to the RAW config
1959            format. See ConfigMaster class to the RAW format description.
1960            Optional 'ignore' and 'filter_' arguments are used to filter
1961            in or out specified keys.
1962
1963    The get_raw_config() calls parse(), so the typical use of this
1964    class is:
1965
1966        raw = CommandLine().get_raw_config(sys.argv[1:])
1967    """
1968
1969    def __init__(self):
1970        self.all_options = list(OPTIONS.keys())
1971        self.all_flags = list(FLAGS.keys())
1972        self.all_actions = list(ACTIONS.keys())
1973
1974        # short:long options equivalence
1975        self.short_long = {
1976            "C": "config-file",
1977            "h": "help",
1978            "H": "no-headers",
1979            "i": "infile",
1980            "n": "enum-title",
1981            "o": "outfile",
1982            "q": "quiet",
1983            "t": "target",
1984            "v": "verbose",
1985            "V": "version",
1986        }
1987
1988        # Compose valid short and long options data for getopt
1989        self.short_opts = self._compose_short_opts()
1990        self.long_opts = self._compose_long_opts()
1991
1992    def _compose_short_opts(self):
1993        "Returns a string like 'hVt:o' with all short options/flags"
1994        ret = []
1995        for opt in self.short_long.keys():
1996            long_ = self.short_long[opt]
1997            if long_ in self.all_options:  # is flag or option?
1998                opt = opt + ":"  # option: have param
1999            ret.append(opt)
2000        # Debug('Valid SHORT options: %s'%ret)
2001        return "".join(ret)
2002
2003    def _compose_long_opts(self):
2004        "Returns a list with all the valid long options/flags"
2005        ret = [x + "=" for x in self.all_options]  # add =
2006        ret.extend(self.all_flags)  # flag ON
2007        ret.extend(self.all_actions)  # actions
2008        ret.extend(["no-" + x for x in self.all_flags])  # add no-*
2009        ret.extend(["no-style"])  # turn OFF
2010        ret.extend(["no-outfile", "no-infile"])  # turn OFF
2011        ret.extend(["no-targets"])  # turn OFF
2012        # Debug('Valid LONG options: %s'%ret)
2013        return ret
2014
2015    def _tokenize(self, cmd_string=""):
2016        "Convert a command line string to a list"
2017        # TODO protect quotes contents -- Don't use it, pass cmdline as list
2018        return cmd_string.split()
2019
2020    def parse(self, cmdline):
2021        "Check/Parse a command line list     TIP: no program name!"
2022        # Get the valid options
2023        short, long_ = self.short_opts, self.long_opts
2024        # Parse it!
2025        try:
2026            opts, args = getopt.getopt(cmdline, short, long_)
2027        except getopt.error as errmsg:
2028            Error("%s (try --help)" % errmsg)
2029        return (opts, args)
2030
2031    def get_raw_config(self, cmdline=None, ignore=None, filter_=None, relative=False):
2032        "Returns the options/arguments found as RAW config"
2033
2034        if not cmdline:
2035            return []
2036        ignore = ignore or []
2037        filter_ = filter_ or []
2038
2039        ret = []
2040
2041        # We need lists, not strings (such as from %!options)
2042        if not isinstance(cmdline, list):
2043            cmdline = self._tokenize(cmdline)
2044
2045        # Extract name/value pair of all configs, check for invalid names
2046        options, arguments = self.parse(cmdline[:])
2047
2048        # Some cleanup on the raw config
2049        for name, value in options:
2050
2051            # Remove leading - and --
2052            name = re.sub("^--?", "", name)
2053
2054            # Translate short option to long
2055            if len(name) == 1:
2056                name = self.short_long[name]
2057
2058            # Outfile exception: path relative to PWD
2059            if name == "outfile" and relative and value not in [STDOUT, MODULEOUT]:
2060                value = os.path.abspath(value)
2061
2062            # -C, --config-file inclusion, path relative to PWD
2063            if name == "config-file":
2064                ret.extend(ConfigLines().include_config_file(value))
2065                continue
2066
2067            # Save this config
2068            ret.append(["all", name, value])
2069
2070        # All configuration was read and saved
2071
2072        # Get infile, if any
2073        while arguments:
2074            infile = arguments.pop(0)
2075            ret.append(["all", "infile", infile])
2076
2077        # Apply 'ignore' and 'filter_' rules (filter_ is stronger)
2078        if ignore or filter_:
2079            filtered = []
2080            for target, name, value in ret:
2081                if (filter_ and name in filter_) or (ignore and name not in ignore):
2082                    filtered.append([target, name, value])
2083            ret = filtered[:]
2084
2085        return ret
2086
2087
2088##############################################################################
2089
2090
2091class SourceDocument:
2092    """
2093    SourceDocument class - scan document structure, extract data
2094
2095    It knows about full files. It reads a file and identify all
2096    the areas beginning (Head,Conf,Body). With this info it can
2097    extract each area contents.
2098    Note: the original line break is removed.
2099
2100    DATA:
2101      self.arearef - Save Head, Conf, Body init line number
2102      self.areas   - Store the area names which are not empty
2103      self.buffer  - The full file contents (with NO \\r, \\n)
2104
2105    METHODS:
2106      get()   - Access the contents of an Area. Example:
2107                config = SourceDocument(file).get('conf')
2108
2109      split() - Get all the document Areas at once. Example:
2110                head, conf, body = SourceDocument(file).split()
2111
2112    RULES:
2113        * The document parts are sequential: Head, Conf and Body.
2114        * One ends when the next begins.
2115        * The Conf Area is optional, so a document can have just
2116          Head and Body Areas.
2117
2118        These are the Areas limits:
2119          - Head Area: the first three lines
2120          - Body Area: from the first valid text line to the end
2121          - Conf Area: the comments between Head and Body Areas
2122
2123        Exception: If the first line is blank, this means no
2124        header info, so the Head Area is just the first line.
2125    """
2126
2127    def __init__(self, filename="", contents=None):
2128        self.areas = ["head", "conf", "body"]
2129        self.arearef = []
2130        self.areas_fancy = ""
2131        self.filename = filename
2132        self.buffer = []
2133        if filename:
2134            self.scan_file(filename)
2135        elif contents:
2136            self.scan(contents)
2137
2138    def split(self):
2139        "Returns all document parts, splitted into lists."
2140        return self.get("head"), self.get("conf"), self.get("body")
2141
2142    def get(self, areaname):
2143        "Returns head|conf|body contents from self.buffer"
2144        # Sanity
2145        if areaname not in self.areas:
2146            return []
2147        if not self.buffer:
2148            return []
2149        # Go get it
2150        bufini = 1
2151        bufend = len(self.buffer)
2152        if areaname == "head":
2153            ini = bufini
2154            end = self.arearef[1] or self.arearef[2] or bufend
2155        elif areaname == "conf":
2156            ini = self.arearef[1]
2157            end = self.arearef[2] or bufend
2158        elif areaname == "body":
2159            ini = self.arearef[2]
2160            end = bufend
2161        else:
2162            Error("Unknown Area name '%s'" % areaname)
2163        lines = self.buffer[ini:end]
2164        # Make sure head will always have 3 lines
2165        while areaname == "head" and len(lines) < 3:
2166            lines.append("")
2167        return lines
2168
2169    def scan_file(self, filename):
2170        Debug("source file: %s" % filename)
2171        Message("Loading source document", 1)
2172        buf = Readfile(filename)
2173        self.scan(buf)
2174
2175    def scan(self, lines):
2176        "Run through source file and identify head/conf/body areas"
2177        buf = lines
2178        if len(buf) == 0:
2179            Error("The input file is empty: %s" % self.filename)
2180        cfg_parser = ConfigLines().parse_line
2181        buf.insert(0, "")  # text start at pos 1
2182        ref = [1, 4, 0]
2183        if not buf[1].strip():  # no header
2184            ref[0] = 0
2185            ref[1] = 2
2186        rgx = getRegexes()
2187        on_comment_block = 0
2188        for i in range(ref[1], len(buf)):  # find body init:
2189            # Handle comment blocks inside config area
2190            if not on_comment_block and rgx["blockCommentOpen"].search(buf[i]):
2191                on_comment_block = 1
2192                continue
2193            if on_comment_block and rgx["blockCommentOpen"].search(buf[i]):
2194                on_comment_block = 0
2195                continue
2196            if on_comment_block:
2197                continue
2198
2199            if buf[i].strip() and (
2200                buf[i][0] != "%" or cfg_parser(buf[i], "include")[1]
2201            ):
2202                ref[2] = i
2203                break
2204        if ref[1] == ref[2]:
2205            ref[1] = 0  # no conf area
2206        for i in 0, 1, 2:  # del !existent
2207            if ref[i] >= len(buf):
2208                ref[i] = 0  # title-only
2209            if not ref[i]:
2210                self.areas[i] = ""
2211        Debug("Head,Conf,Body start line: %s" % ref)
2212        self.arearef = ref  # save results
2213        self.buffer = buf
2214        # Fancyness sample: head conf body (1 4 8)
2215        self.areas_fancy = "{} ({})".format(
2216            " ".join(self.areas), " ".join(str(x or "") for x in ref)
2217        )
2218        Message("Areas found: %s" % self.areas_fancy, 2)
2219
2220    def get_raw_config(self):
2221        "Handy method to get the CONF area RAW config (if any)"
2222        if not self.areas.count("conf"):
2223            return []
2224        Message("Scanning source document CONF area", 1)
2225        raw = ConfigLines(
2226            file_=self.filename, lines=self.get("conf"), first_line=self.arearef[1]
2227        ).get_raw_config()
2228        Debug("document raw config: %s" % raw, 1)
2229        return raw
2230
2231
2232##############################################################################
2233
2234
2235class ConfigMaster:
2236    """
2237    ConfigMaster class - the configuration wizard
2238
2239    This class is the configuration master. It knows how to handle
2240    the RAW and PARSED config format. It also performs the sanity
2241    checking for a given configuration.
2242
2243    DATA:
2244      self.raw         - Stores the config on the RAW format
2245      self.parsed      - Stores the config on the PARSED format
2246      self.defaults    - Stores the default values for all keys
2247      self.off         - Stores the OFF values for all keys
2248      self.multi       - List of keys which can have multiple values
2249      self.incremental - List of keys which are incremental
2250
2251    RAW FORMAT:
2252      The RAW format is a list of lists, being each mother list item
2253      a full configuration entry. Any entry is a 3 item list, on
2254      the following format: [ TARGET, KEY, VALUE ]
2255      Being a list, the order is preserved, so it's easy to use
2256      different kinds of configs, as CONF area and command line,
2257      respecting the precedence.
2258      The special target 'all' is used when no specific target was
2259      defined on the original config.
2260
2261    PARSED FORMAT:
2262      The PARSED format is a dictionary, with all the 'key : value'
2263      found by reading the RAW config. The self.target contents
2264      matters, so this dictionary only contains the target's
2265      config. The configs of other targets are ignored.
2266
2267    The CommandLine and ConfigLines classes have the get_raw_config()
2268    method which convert the configuration found to the RAW format.
2269    Just feed it to parse() and get a brand-new ready-to-use config
2270    dictionary. Example:
2271
2272        >>> raw = CommandLine().get_raw_config(['-n', '-H'])
2273        >>> print raw
2274        [['all', 'enum-title', ''], ['all', 'no-headers', '']]
2275        >>> parsed = ConfigMaster(raw).parse()
2276        >>> print parsed
2277        {'enum-title': 1, 'headers': 0}
2278    """
2279
2280    def __init__(self, raw=None, target=""):
2281        self.raw = raw or []
2282        self.target = target
2283        self.parsed = {}
2284        self.dft_options = OPTIONS.copy()
2285        self.dft_flags = FLAGS.copy()
2286        self.dft_actions = ACTIONS.copy()
2287        self.defaults = self._get_defaults()
2288        self.off = self._get_off()
2289        self.incremental = ["verbose"]
2290        self.multi = ["infile", "preproc", "postproc", "options", "style"]
2291
2292    def _get_defaults(self):
2293        "Get the default values for all config/options/flags"
2294        empty = {}
2295        for kw in CONFIG_KEYWORDS:
2296            empty[kw] = ""
2297        empty.update(self.dft_options)
2298        empty.update(self.dft_flags)
2299        empty.update(self.dft_actions)
2300        empty["sourcefile"] = ""  # internal use only
2301        return empty
2302
2303    def _get_off(self):
2304        "Turns OFF all the config/options/flags"
2305        off = {}
2306        for key in self.defaults.keys():
2307            kind = type(self.defaults[key])
2308            if kind == int:
2309                off[key] = 0
2310            elif kind == str:
2311                off[key] = ""
2312            elif kind == list:
2313                off[key] = []
2314            else:
2315                Error("ConfigMaster: %s: Unknown type" % key)
2316        return off
2317
2318    def _check_target(self):
2319        "Checks if the target is already defined. If not, do it"
2320        if not self.target:
2321            self.target = self.find_value("target")
2322
2323    def get_target_raw(self):
2324        "Returns the raw config for self.target or 'all'"
2325        ret = []
2326        self._check_target()
2327        for entry in self.raw:
2328            if entry[0] == self.target or entry[0] == "all":
2329                ret.append(entry)
2330        return ret
2331
2332    def add(self, key, val):
2333        "Adds the key:value pair to the config dictionary (if needed)"
2334        # %!options
2335        if key == "options":
2336            ignoreme = list(self.dft_actions.keys()) + ["target"]
2337            ignoreme.remove("targets")
2338            raw_opts = CommandLine().get_raw_config(val, ignore=ignoreme)
2339            for _target, key, val in raw_opts:
2340                self.add(key, val)
2341            return
2342        # The no- prefix turns OFF this key
2343        if key.startswith("no-"):
2344            key = key[3:]  # remove prefix
2345            val = self.off.get(key)  # turn key OFF
2346        # Is this key valid?
2347        if key not in self.defaults.keys():
2348            Debug("Bogus Config {}:{}".format(key, val), 1)
2349            return
2350        # Is this value the default one?
2351        if val == self.defaults.get(key):
2352            # If default value, remove previous key:val
2353            if key in self.parsed:
2354                del self.parsed[key]
2355            # Nothing more to do
2356            return
2357        # Flags ON comes empty. we'll add the 1 value now
2358        if val == "" and (
2359            key in self.dft_flags.keys() or key in self.dft_actions.keys()
2360        ):
2361            val = 1
2362        # Multi value or single?
2363        if key in self.multi:
2364            # First one? start new list
2365            if key not in self.parsed:
2366                self.parsed[key] = []
2367            self.parsed[key].append(val)
2368        # Incremental value? so let's add it
2369        elif key in self.incremental:
2370            self.parsed[key] = (self.parsed.get(key) or 0) + val
2371        else:
2372            self.parsed[key] = val
2373        fancykey = dotted_spaces("%12s" % key)
2374        Message("Added config {} : {}".format(fancykey, val), 3)
2375
2376    def get_outfile_name(self, config):
2377        "Dirname is the same for {in,out}file"
2378        infile, outfile = config["sourcefile"], config["outfile"]
2379        if (
2380            outfile
2381            and outfile not in (STDOUT, MODULEOUT)
2382            and not os.path.isabs(outfile)
2383        ):
2384            outfile = os.path.join(os.path.dirname(infile), outfile)
2385        if infile == STDIN and not outfile:
2386            outfile = STDOUT
2387        if infile == MODULEIN and not outfile:
2388            outfile = MODULEOUT
2389        if not outfile and (infile and config.get("target")):
2390            basename = re.sub(r"\.(txt|t2t)$", "", infile)
2391            outfile = "{}.{}".format(basename, config["target"])
2392        Debug(" infile: '%s'" % infile, 1)
2393        Debug("outfile: '%s'" % outfile, 1)
2394        return outfile
2395
2396    def sanity(self, config):
2397        "Basic config sanity checking"
2398        if not config:
2399            return {}
2400        target = config.get("target")
2401        # Some actions don't require target specification
2402        if not target:
2403            for action in NO_TARGET:
2404                if config.get(action):
2405                    target = "txt"
2406                    break
2407
2408        # We *need* a target
2409        if not target:
2410            Error(
2411                "No target specified (try --help)."
2412                + "\n\n"
2413                + "Please select a target using the -t option or the %!target command."
2414                + "\n"
2415                + "Example:"
2416                + " {} -t html {}".format(my_name, "file.t2t")
2417                + "\n\n"
2418                + "Run 'txt2tags --targets' to see all available targets."
2419            )
2420        # And of course, an infile also
2421        if "infile" not in config:
2422            Error("Missing input file (try --help)")
2423        # Is the target valid?
2424        if not TARGETS.count(target):
2425            Error(
2426                "Invalid target '%s'" % target
2427                + "\n\n"
2428                + "Run 'txt2tags --targets' to see all the available targets."
2429            )
2430        # Ensure all keys are present
2431        empty = self.defaults.copy()
2432        empty.update(config)
2433        config = empty.copy()
2434        # Restore target
2435        config["target"] = target
2436        # Set output file name
2437        config["outfile"] = self.get_outfile_name(config)
2438        # Checking suicide
2439        if os.path.abspath(config["sourcefile"]) == os.path.abspath(
2440            config["outfile"]
2441        ) and config["outfile"] not in [STDOUT, MODULEOUT]:
2442            Error("Input and Output files are the same: %s" % config["outfile"])
2443        return config
2444
2445    def parse(self):
2446        "Returns the parsed config for the current target"
2447        raw = self.get_target_raw()
2448        for _target, key, value in raw:
2449            self.add(key, value)
2450        Message("Added the following keys: %s" % ", ".join(sorted(self.parsed)), 2)
2451        return self.parsed.copy()
2452
2453    def find_value(self, key="", target=""):
2454        "Scans ALL raw config to find the desired key"
2455        ret = []
2456        # Scan and save all values found
2457        for targ, k, val in self.raw:
2458            if k == key and (targ == target or targ == "all"):
2459                ret.append(val)
2460        if not ret:
2461            return ""
2462        # If not multi value, return only the last found
2463        if key in self.multi:
2464            return ret
2465        else:
2466            return ret[-1]
2467
2468
2469########################################################################
2470
2471
2472class ConfigLines:
2473    """
2474    ConfigLines class - the config file data extractor
2475
2476    This class reads and parse the config lines on the %!key:val
2477    format, converting it to RAW config. It deals with user
2478    config file (RC file), source document CONF area and
2479    %!includeconf directives.
2480
2481    Call it passing a file name or feed the desired config lines.
2482    Then just call the get_raw_config() method and wait to
2483    receive the full config data on the RAW format. This method
2484    also follows the possible %!includeconf directives found on
2485    the config lines. Example:
2486
2487            raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
2488
2489    The parse_line() method is also useful to be used alone,
2490    to identify and tokenize a single config line. For example,
2491    to get the %!include command components, on the source
2492    document BODY:
2493
2494            target, key, value = ConfigLines().parse_line(body_line)
2495    """
2496
2497    def __init__(self, file_="", lines=None, first_line=1):
2498        self.file = file_ or "NOFILE"
2499        self.lines = lines or []
2500        self.first_line = first_line
2501
2502    def load_lines(self):
2503        "Make sure we've loaded the file contents into buffer"
2504        if not self.lines and not self.file:
2505            Error("ConfigLines: No file or lines provided")
2506        if not self.lines:
2507            self.lines = self.read_config_file(self.file)
2508
2509    def read_config_file(self, filename=""):
2510        "Read a Config File contents, aborting on invalid line"
2511        if not filename:
2512            return []
2513        errormsg = "Invalid CONFIG line on %s" + "\n%03d:%s"
2514        lines = Readfile(filename)
2515        # Sanity: try to find invalid config lines
2516        for i in range(len(lines)):
2517            line = lines[i].rstrip()
2518            if not line:
2519                continue  # empty
2520            if line[0] != "%":
2521                Error(errormsg % (filename, i + 1, line))
2522        return lines
2523
2524    def include_config_file(self, file_=""):
2525        "Perform the %!includeconf action, returning RAW config"
2526        if not file_:
2527            return []
2528        # Current dir relative to the current file (self.file)
2529        current_dir = os.path.dirname(self.file)
2530        file_ = os.path.join(current_dir, file_)
2531        # Read and parse included config file contents
2532        lines = self.read_config_file(file_)
2533        return ConfigLines(file_=file_, lines=lines).get_raw_config()
2534
2535    def get_raw_config(self):
2536        "Scan buffer and extract all config as RAW (including includes)"
2537        ret = []
2538        self.load_lines()
2539        first = self.first_line
2540        for i in range(len(self.lines)):
2541            line = self.lines[i]
2542            Message("Processing line %03d: %s" % (first + i, line), 2)
2543            target, key, val = self.parse_line(line)
2544            if not key:
2545                continue  # no config on this line
2546            if key == "includeconf":
2547                err = "A file cannot include itself (loop!)"
2548                if val == self.file:
2549                    Error("{}: %!includeconf: {}".format(err, self.file))
2550                more_raw = self.include_config_file(val)
2551                ret.extend(more_raw)
2552                Message("Finished Config file inclusion: %s" % val, 2)
2553            else:
2554                ret.append([target, key, val])
2555                Message("Added %s" % key, 3)
2556        return ret
2557
2558    def parse_line(self, line="", keyname="", target=""):
2559        "Detects %!key:val config lines and extract data from it"
2560        empty = ["", "", ""]
2561        if not line:
2562            return empty
2563        no_target = ["target", "includeconf"]
2564        re_name = keyname or "[a-z]+"
2565        re_target = target or "[a-z]*"
2566        # XXX TODO <value>\S.+?  requires TWO chars, breaks %!include:a
2567        cfgregex = re.compile(
2568            r"""
2569                ^%%!\s*               # leading id with opt spaces
2570                (?P<name>%s)\s*       # config name
2571                (\((?P<target>%s)\))? # optional target spec inside ()
2572                \s*:\s*               # key:value delimiter with opt spaces
2573                (?P<value>\S.+?)      # config value
2574                \s*$                  # rstrip() spaces and hit EOL
2575                """
2576            % (re_name, re_target),
2577            re.I + re.VERBOSE,
2578        )
2579        prepostregex = re.compile(
2580            r"""
2581                                      # ---[ PATTERN ]---
2582                ^( "([^"]*)"          # "double quoted" or
2583                | '([^']*)'           # 'single quoted' or
2584                | ([^\s]+)            # single_word
2585                )
2586                \s+                   # separated by spaces
2587
2588                                      # ---[ REPLACE ]---
2589                ( "([^"]*)"           # "double quoted" or
2590                | '([^']*)'           # 'single quoted' or
2591                | (.*)                # anything
2592                )
2593                \s*$
2594                """,
2595            re.VERBOSE,
2596        )
2597
2598        # Give me a match or get out
2599        match = cfgregex.match(line)
2600        if not match:
2601            return empty
2602
2603        # Save information about this config
2604        name = (match.group("name") or "").lower()
2605        target = (match.group("target") or "all").lower()
2606        value = match.group("value")
2607
2608        # %!keyword(target) not allowed for these
2609        if name in no_target and match.group("target"):
2610            Error("You can't use (target) with %s" % ("%!" + name) + "\n%s" % line)
2611
2612        # Force no_target keywords to be valid for all targets
2613        if name in no_target:
2614            target = "all"
2615
2616        # Special config with two quoted values (%!preproc: "foo" 'bar')
2617        if name == "preproc" or name == "postproc":
2618            valmatch = prepostregex.search(value)
2619            if not valmatch:
2620                return empty
2621            getval = valmatch.group
2622            patt = getval(2) or getval(3) or getval(4) or ""
2623            repl = getval(6) or getval(7) or getval(8) or ""
2624            value = (patt, repl)
2625        return [target, name, value]
2626
2627
2628##############################################################################
2629
2630
2631class MaskMaster:
2632    "(Un)Protect important structures from escaping and formatting"
2633
2634    def __init__(self):
2635        self.linkmask = "vvvLINKvvv"
2636        self.monomask = "vvvMONOvvv"
2637        self.rawmask = "vvvRAWvvv"
2638        self.taggedmask = "vvvTAGGEDvvv"
2639        self.reset()
2640
2641    def reset(self):
2642        self.linkbank = []
2643        self.monobank = []
2644        self.rawbank = []
2645        self.taggedbank = []
2646
2647    def mask(self, line=""):
2648        # The verbatim, raw and tagged inline marks are mutually exclusive.
2649        # This means that one can't appear inside the other.
2650        # If found, the inner marks must be ignored.
2651        # Example: ``foo ""bar"" ''baz''``
2652        # In HTML: <code>foo ""bar"" ''baz''</code>
2653        #
2654        # The trick here is to protect the mark who appears first on the line.
2655        # The three regexes are tried and the one with the lowest index wins.
2656        # If none is found (else), we get out of the loop.
2657        #
2658        while True:
2659            try:
2660                t = regex["tagged"].search(line).start()
2661            except Exception:
2662                t = -1
2663
2664            try:
2665                r = regex["raw"].search(line).start()
2666            except Exception:
2667                r = -1
2668
2669            try:
2670                v = regex["fontMono"].search(line).start()
2671            except Exception:
2672                v = -1
2673
2674            # Protect tagged text
2675            if t >= 0 and (r == -1 or t < r) and (v == -1 or t < v):
2676                txt = regex["tagged"].search(line).group(1)
2677                if TARGET == "tex":
2678                    txt = txt.replace("_", "vvvUnderscoreInTaggedTextvvv")
2679                self.taggedbank.append(txt)
2680                line = regex["tagged"].sub(self.taggedmask, line, 1)
2681
2682            # Protect raw text
2683            elif r >= 0 and (t == -1 or r < t) and (v == -1 or r < v):
2684                txt = regex["raw"].search(line).group(1)
2685                txt = doEscape(TARGET, txt)
2686                if TARGET == "tex":
2687                    txt = txt.replace("_", "vvvUnderscoreInRawTextvvv")
2688                self.rawbank.append(txt)
2689                line = regex["raw"].sub(self.rawmask, line, 1)
2690
2691            # Protect verbatim text
2692            elif v >= 0 and (t == -1 or v < t) and (r == -1 or v < r):
2693                txt = regex["fontMono"].search(line).group(1)
2694                txt = doEscape(TARGET, txt)
2695                self.monobank.append(txt)
2696                line = regex["fontMono"].sub(self.monomask, line, 1)
2697            else:
2698                break
2699
2700        # Protect URLs and emails
2701        while regex["linkmark"].search(line) or regex["link"].search(line):
2702
2703            # Try to match plain or named links
2704            match_link = regex["link"].search(line)
2705            match_named = regex["linkmark"].search(line)
2706
2707            # Define the current match
2708            if match_link and match_named:
2709                # Both types found, which is the first?
2710                m = match_link
2711                if match_named.start() < match_link.start():
2712                    m = match_named
2713            else:
2714                # Just one type found, we're fine
2715                m = match_link or match_named
2716
2717            # Extract link data and apply mask
2718            if m == match_link:  # plain link
2719                link = m.group()
2720                label = ""
2721                link_re = regex["link"]
2722            else:  # named link
2723                link = m.group("link")
2724                label = m.group("label").rstrip()
2725                link_re = regex["linkmark"]
2726            line = link_re.sub(self.linkmask, line, 1)
2727
2728            # Save link data to the link bank
2729            self.linkbank.append((label, link))
2730        return line
2731
2732    def undo(self, line):
2733        # url & email
2734        for label, url in self.linkbank:
2735            link = get_tagged_link(label, url)
2736            line = line.replace(self.linkmask, link, 1)
2737
2738        # Expand verb
2739        for mono in self.monobank:
2740            open_, close = TAGS["fontMonoOpen"], TAGS["fontMonoClose"]
2741            line = line.replace(self.monomask, open_ + mono + close, 1)
2742
2743        # Expand raw
2744        for raw in self.rawbank:
2745            line = line.replace(self.rawmask, raw, 1)
2746
2747        # Expand tagged
2748        for tagged in self.taggedbank:
2749            line = line.replace(self.taggedmask, tagged, 1)
2750
2751        return line
2752
2753
2754##############################################################################
2755
2756
2757class TitleMaster:
2758    "Title things"
2759
2760    def __init__(self):
2761        self.count = ["", 0, 0, 0, 0, 0]
2762        self.toc = []
2763        self.level = 0
2764        self.kind = ""
2765        self.txt = ""
2766        self.label = ""
2767        self.tag = ""
2768        self.tag_hold = []
2769        self.last_level = 0
2770        self.count_id = ""
2771        self.anchor_count = 0
2772        self.anchor_prefix = "toc"
2773
2774    def _open_close_blocks(self):
2775        "Open new title blocks, closing the previous (if any)"
2776        if not rules["titleblocks"]:
2777            return
2778        tag = ""
2779        last = self.last_level
2780        curr = self.level
2781
2782        # Same level, just close the previous
2783        if curr == last:
2784            tag = TAGS.get("title%dClose" % last)
2785            if tag:
2786                self.tag_hold.append(tag)
2787
2788        # Section -> subsection, more depth
2789        while curr > last:
2790            last += 1
2791
2792            # Open the new block of subsections
2793            tag = TAGS.get("blockTitle%dOpen" % last)
2794            if tag:
2795                self.tag_hold.append(tag)
2796
2797            # Jump from title1 to title3 or more
2798            # Fill the gap with an empty section
2799            if curr - last > 0:
2800                tag = TAGS.get("title%dOpen" % last)
2801                tag = regex["x"].sub("", tag)  # del \a
2802                if tag:
2803                    self.tag_hold.append(tag)
2804
2805        # Section <- subsection, less depth
2806        while curr < last:
2807            # Close the current opened subsection
2808            tag = TAGS.get("title%dClose" % last)
2809            if tag:
2810                self.tag_hold.append(tag)
2811
2812            # Close the current opened block of subsections
2813            tag = TAGS.get("blockTitle%dClose" % last)
2814            if tag:
2815                self.tag_hold.append(tag)
2816
2817            last -= 1
2818
2819            # Close the previous section of the same level
2820            # The subsections were under it
2821            if curr == last:
2822                tag = TAGS.get("title%dClose" % last)
2823                if tag:
2824                    self.tag_hold.append(tag)
2825
2826    def add(self, line):
2827        "Parses a new title line."
2828        if not line:
2829            return
2830        self._set_prop(line)
2831        self._open_close_blocks()
2832        self._set_count_id()
2833        self._set_label()
2834        self._save_toc_info()
2835
2836    def close_all(self):
2837        "Closes all opened title blocks"
2838        ret = []
2839        ret.extend(self.tag_hold)
2840        while self.level:
2841            tag = TAGS.get("title%dClose" % self.level)
2842            if tag:
2843                ret.append(tag)
2844            tag = TAGS.get("blockTitle%dClose" % self.level)
2845            if tag:
2846                ret.append(tag)
2847            self.level -= 1
2848        return ret
2849
2850    def _save_toc_info(self):
2851        "Save TOC info, used by self.dump_marked_toc()"
2852        self.toc.append((self.level, self.count_id, self.txt, self.label))
2853
2854    def _set_prop(self, line=""):
2855        "Extract info from original line and set data holders."
2856        # Detect title type (numbered or not)
2857        id_ = line.lstrip()[0]
2858        if id_ == "=":
2859            kind = "title"
2860        elif id_ == "+":
2861            kind = "numtitle"
2862        else:
2863            Error("Unknown Title ID '%s'" % id_)
2864        # Extract line info
2865        match = regex[kind].search(line)
2866        level = len(match.group("id"))
2867        txt = match.group("txt").strip()
2868        label = match.group("label")
2869        # Parse info & save
2870        if CONF["enum-title"]:
2871            kind = "numtitle"  # force
2872        if rules["titleblocks"]:
2873            self.tag = TAGS.get("%s%dOpen" % (kind, level)) or TAGS.get(
2874                "title%dOpen" % level
2875            )
2876        else:
2877            self.tag = TAGS.get(kind + repr(level)) or TAGS.get("title" + repr(level))
2878        self.last_level = self.level
2879        self.kind = kind
2880        self.level = level
2881        self.txt = txt
2882        self.label = label
2883
2884    def _set_count_id(self):
2885        "Compose and save the title count identifier (if needed)."
2886        count_id = ""
2887        if self.kind == "numtitle" and not rules["autonumbertitle"]:
2888            # Manually increase title count
2889            self.count[self.level] += 1
2890            # Reset sublevels count (if any)
2891            max_levels = len(self.count)
2892            if self.level < max_levels - 1:
2893                for i in range(self.level + 1, max_levels):
2894                    self.count[i] = 0
2895            # Compose count id from hierarchy
2896            for i in range(self.level):
2897                count_id = "%s%d." % (count_id, self.count[i + 1])
2898        self.count_id = count_id
2899
2900    def _set_label(self):
2901        "Compose and save title label, used by anchors."
2902        # Remove invalid chars from label set by user
2903        self.label = re.sub("[^A-Za-z0-9_-]", "", self.label or "")
2904
2905    def _get_tagged_anchor(self):
2906        "Return anchor if user defined a label, or TOC is on."
2907        ret = ""
2908        label = self.label
2909        if CONF["toc"]:
2910            self.anchor_count += 1
2911            # Autonumber label (if needed)
2912            label = label or "{}{}".format(self.anchor_prefix, self.anchor_count)
2913        if label and TAGS["anchor"]:
2914            ret = regex["x"].sub(label, TAGS["anchor"])
2915        return ret
2916
2917    def _get_full_title_text(self):
2918        "Returns the full title contents, already escaped."
2919        ret = self.txt
2920        # Insert count_id (if any) before text
2921        if self.count_id:
2922            ret = "{} {}".format(self.count_id, ret)
2923        # Escape specials
2924        ret = doEscape(TARGET, ret)
2925        # Same targets needs final escapes on title lines
2926        # It's here because there is a 'continue' after title
2927        if rules["finalescapetitle"]:
2928            ret = doFinalEscape(TARGET, ret)
2929        return ret
2930
2931    def get(self):
2932        "Returns the tagged title as a list."
2933        ret = []
2934
2935        # Maybe some anchoring before?
2936        anchor = self._get_tagged_anchor()
2937        self.tag = regex["_anchor"].sub(anchor, self.tag)
2938
2939        # Compose & escape title text (TOC uses unescaped)
2940        full_title = self._get_full_title_text()
2941
2942        # Close previous section area
2943        ret.extend(self.tag_hold)
2944        self.tag_hold = []
2945
2946        tagged = regex["x"].sub(full_title, self.tag)
2947
2948        # Adds "underline" on TXT target
2949        if TARGET == "txt":
2950            if BLOCK.count > 1:
2951                ret.append("")  # blank line before
2952            ret.append(tagged)
2953            # Get the right letter count for UTF
2954            if isinstance(full_title, bytes):
2955                full_title = full_title.decode(ENCODING)
2956            ret.append(regex["x"].sub("=" * len(full_title), self.tag))
2957        else:
2958            ret.append(tagged)
2959        return ret
2960
2961    def dump_marked_toc(self):
2962        "Dumps all toc itens as a valid t2t-marked list"
2963        ret = []
2964        toc_count = 1
2965        for level, count_id, txt, label in self.toc:
2966            indent = "  " * level
2967            id_txt = ("{} {}".format(count_id, txt)).lstrip()
2968            label = label or self.anchor_prefix + repr(toc_count)
2969            toc_count += 1
2970
2971            # TOC will have crosslinks to anchors
2972            if TAGS["anchor"]:
2973                if CONF["enum-title"] and level == 1:
2974                    # 1. [Foo #anchor] is more readable than [1. Foo #anchor] in level 1.
2975                    # This is an idea stolen from Windows .CHM help files.
2976                    tocitem = '{}+ [""{}"" #{}]'.format(indent, txt, label)
2977                else:
2978                    tocitem = '{}- [""{}"" #{}]'.format(indent, id_txt, label)
2979
2980            # TOC will be plain text (no links)
2981            else:
2982                if TARGET in ["txt", "man"]:
2983                    # For these, the list is not necessary, just dump the text
2984                    tocitem = '{}""{}""'.format(indent, id_txt)
2985                else:
2986                    tocitem = '{}- ""{}""'.format(indent, id_txt)
2987            ret.append(tocitem)
2988        return ret
2989
2990
2991##############################################################################
2992
2993# TODO check all this table mess
2994# It uses parse_row properties for table lines
2995# BLOCK.table() replaces the cells by the parsed content
2996class TableMaster:
2997    def __init__(self, line=""):
2998        self.rows = []
2999        self.border = False
3000        self.align = "Left"
3001        self.cellalign = []
3002        self.colalign = []
3003        self.cellspan = []
3004        if line:
3005            prop = self.parse_row(line)
3006            self.border = prop["border"]
3007            self.align = prop["align"]
3008            self.cellalign = prop["cellalign"]
3009            self.cellspan = prop["cellspan"]
3010            self.colalign = self._get_col_align()
3011
3012    def _get_col_align(self):
3013        colalign = []
3014        for cell in range(len(self.cellalign)):
3015            align = self.cellalign[cell]
3016            span = self.cellspan[cell]
3017            colalign.extend([align] * span)
3018        return colalign
3019
3020    def _get_open_tag(self):
3021        topen = TAGS["tableOpen"]
3022        tborder = TAGS["_tableBorder"]
3023        talign = TAGS["_tableAlign" + self.align]
3024        calignsep = TAGS["tableColAlignSep"]
3025        calign = ""
3026
3027        # The first line defines if table has border or not
3028        if not self.border:
3029            tborder = ""
3030        # Set the columns alignment
3031        if rules["tablecellaligntype"] == "column":
3032            calign = [TAGS["_tableColAlign%s" % x] for x in self.colalign]
3033            calign = calignsep.join(calign)
3034        # Align full table, set border and Column align (if any)
3035        topen = regex["_tableAlign"].sub(talign, topen)
3036        topen = regex["_tableBorder"].sub(tborder, topen)
3037        topen = regex["_tableColAlign"].sub(calign, topen)
3038        # Tex table spec, border or not: {|l|c|r|} , {lcr}
3039        if calignsep and not self.border:
3040            # Remove cell align separator
3041            topen = topen.replace(calignsep, "")
3042        return topen
3043
3044    def _get_cell_align(self, cells):
3045        ret = []
3046        for cell in cells:
3047            align = "Left"
3048            if cell.strip():
3049                if cell[0] == " " and cell[-1] == " ":
3050                    align = "Center"
3051                elif cell[0] == " ":
3052                    align = "Right"
3053            ret.append(align)
3054        return ret
3055
3056    def _get_cell_span(self, cells):
3057        ret = []
3058        for cell in cells:
3059            span = 1
3060            m = re.search(r"\a(\|+)$", cell)
3061            if m:
3062                span = len(m.group(1)) + 1
3063            ret.append(span)
3064        return ret
3065
3066    def _tag_cells(self, rowdata):
3067        row = []
3068        cells = rowdata["cells"]
3069        open_ = TAGS["tableCellOpen"]
3070        close = TAGS["tableCellClose"]
3071        sep = TAGS["tableCellSep"]
3072        calign = [TAGS["_tableCellAlign" + x] for x in rowdata["cellalign"]]
3073        calignsep = TAGS["tableColAlignSep"]
3074        ncolumns = len(self.colalign)
3075
3076        # Populate the span and multicol open tags
3077        cspan = []
3078        multicol = []
3079        colindex = 0
3080        for cellindex in range(0, len(rowdata["cellspan"])):
3081
3082            span = rowdata["cellspan"][cellindex]
3083            align = rowdata["cellalign"][cellindex]
3084
3085            if span > 1:
3086                cspan.append(regex["x"].sub(str(span), TAGS["_tableCellColSpan"]))
3087
3088                mcopen = regex["x"].sub(str(span), TAGS["_tableCellMulticolOpen"])
3089                multicol.append(mcopen)
3090            else:
3091                cspan.append("")
3092
3093                if colindex < ncolumns and align != self.colalign[colindex]:
3094                    mcopen = regex["x"].sub("1", TAGS["_tableCellMulticolOpen"])
3095                    multicol.append(mcopen)
3096                else:
3097                    multicol.append("")
3098
3099            if not self.border:
3100                multicol[-1] = multicol[-1].replace(calignsep, "")
3101
3102            colindex += span
3103
3104        # Maybe is it a title row?
3105        if rowdata["title"]:
3106            open_ = TAGS["tableTitleCellOpen"] or open_
3107            close = TAGS["tableTitleCellClose"] or close
3108            sep = TAGS["tableTitleCellSep"] or sep
3109
3110        # Should we break the line on *each* table cell?
3111        if rules["breaktablecell"]:
3112            close = close + "\n"
3113
3114        # Cells pre processing
3115        if rules["tablecellstrip"]:
3116            cells = [x.strip() for x in cells]
3117        if rowdata["title"] and rules["tabletitlerowinbold"]:
3118            cells = [enclose_me("fontBold", x) for x in cells]
3119
3120        # Add cell BEGIN/END tags
3121        for cell in cells:
3122            copen = open_
3123            cclose = close
3124            # Make sure we will pop from some filled lists
3125            # Fixes empty line bug '| |'
3126            this_align = this_span = this_mcopen = ""
3127            if calign:
3128                this_align = calign.pop(0)
3129            if cspan:
3130                this_span = cspan.pop(0)
3131            if multicol:
3132                this_mcopen = multicol.pop(0)
3133
3134            # Insert cell align into open tag (if cell is alignable)
3135            if rules["tablecellaligntype"] == "cell":
3136                copen = regex["_tableCellAlign"].sub(this_align, copen)
3137
3138            # Insert cell span into open tag (if cell is spannable)
3139            if rules["tablecellspannable"]:
3140                copen = regex["_tableCellColSpan"].sub(this_span, copen)
3141
3142            # Use multicol tags instead (if multicol supported, and if
3143            # cell has a span or is aligned differently to column)
3144            if rules["tablecellmulticol"]:
3145                if this_mcopen:
3146                    copen = regex["_tableColAlign"].sub(this_align, this_mcopen)
3147                    cclose = TAGS["_tableCellMulticolClose"]
3148
3149            row.append(copen + cell + cclose)
3150
3151        # Maybe there are cell separators?
3152        return sep.join(row)
3153
3154    def add_row(self, cells):
3155        self.rows.append(cells)
3156
3157    def parse_row(self, line):
3158        # Default table properties
3159        ret = {
3160            "border": False,
3161            "title": False,
3162            "align": "Left",
3163            "cells": [],
3164            "cellalign": [],
3165            "cellspan": [],
3166        }
3167        # Detect table align (and remove spaces mark)
3168        if line[0] == " ":
3169            ret["align"] = "Center"
3170        line = line.lstrip()
3171        # Detect title mark
3172        if line[1] == "|":
3173            ret["title"] = True
3174        # Detect border mark and normalize the EOL
3175        m = re.search(r" (\|+) *$", line)
3176        if m:
3177            line += " "
3178            ret["border"] = True
3179        else:
3180            line += " | "
3181        # Delete table mark
3182        line = regex["table"].sub("", line)
3183        # Detect colspan  | foo | bar baz |||
3184        line = re.sub(r" (\|+)\| ", "\a\\1 | ", line)
3185        # Split cells (the last is fake)
3186        ret["cells"] = line.split(" | ")[:-1]
3187        # Find cells span
3188        ret["cellspan"] = self._get_cell_span(ret["cells"])
3189        # Remove span ID
3190        ret["cells"] = [re.sub(r"\a\|+$", "", x) for x in ret["cells"]]
3191        # Find cells align
3192        ret["cellalign"] = self._get_cell_align(ret["cells"])
3193        # Hooray!
3194        Debug("Table Prop: %s" % ret, 7)
3195        return ret
3196
3197    def dump(self):
3198        open_ = self._get_open_tag()
3199        rows = self.rows
3200        close = TAGS["tableClose"]
3201
3202        rowopen = TAGS["tableRowOpen"]
3203        rowclose = TAGS["tableRowClose"]
3204        rowsep = TAGS["tableRowSep"]
3205        titrowopen = TAGS["tableTitleRowOpen"] or rowopen
3206        titrowclose = TAGS["tableTitleRowClose"] or rowclose
3207
3208        if rules["breaktablelineopen"]:
3209            rowopen = rowopen + "\n"
3210            titrowopen = titrowopen + "\n"
3211
3212        # Tex gotchas
3213        if TARGET == "tex":
3214            if not self.border:
3215                rowopen = titrowopen = ""
3216            else:
3217                close = rowopen + close
3218
3219        # Now we tag all the table cells on each row
3220        tagged_cells = [self._tag_cells(cell) for cell in rows]
3221
3222        # Add row separator tags between lines
3223        tagged_rows = []
3224        if rowsep:
3225            tagged_rows = [cell + rowsep for cell in tagged_cells]
3226            # Remove last rowsep, because the table is over
3227            tagged_rows[-1] = tagged_rows[-1].replace(rowsep, "")
3228        # Add row BEGIN/END tags for each line
3229        else:
3230            for rowdata in rows:
3231                if rowdata["title"]:
3232                    o, c = titrowopen, titrowclose
3233                else:
3234                    o, c = rowopen, rowclose
3235                row = tagged_cells.pop(0)
3236                tagged_rows.append(o + row + c)
3237
3238        # Join the pieces together
3239        fulltable = []
3240        if open_:
3241            fulltable.append(open_)
3242        fulltable.extend(tagged_rows)
3243        if close:
3244            fulltable.append(close)
3245
3246        return fulltable
3247
3248
3249##############################################################################
3250
3251
3252class BlockMaster:
3253    "TIP: use blockin/out to add/del holders"
3254
3255    def __init__(self):
3256        self.BLK = []
3257        self.HLD = []
3258        self.PRP = []
3259        self.depth = 0
3260        self.count = 0
3261        self.last = ""
3262        self.tableparser = None
3263        self.contains = {
3264            "para": ["comment", "raw", "tagged"],
3265            "verb": [],
3266            "table": ["comment"],
3267            "raw": [],
3268            "tagged": [],
3269            "comment": [],
3270            "quote": ["quote", "comment", "raw", "tagged"],
3271            "list": [
3272                "list",
3273                "numlist",
3274                "deflist",
3275                "para",
3276                "verb",
3277                "comment",
3278                "raw",
3279                "tagged",
3280            ],
3281            "numlist": [
3282                "list",
3283                "numlist",
3284                "deflist",
3285                "para",
3286                "verb",
3287                "comment",
3288                "raw",
3289                "tagged",
3290            ],
3291            "deflist": [
3292                "list",
3293                "numlist",
3294                "deflist",
3295                "para",
3296                "verb",
3297                "comment",
3298                "raw",
3299                "tagged",
3300            ],
3301            "bar": [],
3302            "title": [],
3303            "numtitle": [],
3304        }
3305        self.allblocks = list(self.contains.keys())
3306
3307        # If one is found inside another, ignore the marks
3308        self.exclusive = ["comment", "verb", "raw", "tagged"]
3309
3310        # May we include bars inside quotes?
3311        if rules["barinsidequote"]:
3312            self.contains["quote"].append("bar")
3313
3314    def block(self):
3315        if not self.BLK:
3316            return ""
3317        return self.BLK[-1]
3318
3319    def isblock(self, name=""):
3320        return self.block() == name
3321
3322    def prop(self, key):
3323        if not self.PRP:
3324            return ""
3325        return self.PRP[-1].get(key) or ""
3326
3327    def propset(self, key, val):
3328        self.PRP[-1][key] = val
3329        # Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
3330        # Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
3331
3332    def hold(self):
3333        if not self.HLD:
3334            return []
3335        return self.HLD[-1]
3336
3337    def holdadd(self, line):
3338        if self.block().endswith("list"):
3339            line = [line]
3340        self.HLD[-1].append(line)
3341        Debug("HOLD add: %s" % repr(line), 4)
3342        Debug("FULL HOLD: %s" % self.HLD, 4)
3343
3344    def holdaddsub(self, line):
3345        self.HLD[-1][-1].append(line)
3346        Debug("HOLD addsub: %s" % repr(line), 4)
3347        Debug("FULL HOLD: %s" % self.HLD, 4)
3348
3349    def holdextend(self, lines):
3350        if self.block().endswith("list"):
3351            lines = [lines]
3352        self.HLD[-1].extend(lines)
3353        Debug("HOLD extend: %s" % repr(lines), 4)
3354        Debug("FULL HOLD: %s" % self.HLD, 4)
3355
3356    def blockin(self, block):
3357        ret = []
3358        if block not in self.allblocks:
3359            Error("Invalid block '%s'" % block)
3360
3361        # First, let's close other possible open blocks
3362        while self.block() and block not in self.contains[self.block()]:
3363            ret.extend(self.blockout())
3364
3365        # Now we can gladly add this new one
3366        self.BLK.append(block)
3367        self.HLD.append([])
3368        self.PRP.append({})
3369        self.count += 1
3370        if block == "table":
3371            self.tableparser = TableMaster()
3372        # Deeper and deeper
3373        self.depth = len(self.BLK)
3374        Debug("block ++ ({}): {}".format(block, self.BLK), 3)
3375        return ret
3376
3377    def blockout(self):
3378        if not self.BLK:
3379            Error("No block to pop")
3380        blockname = self.BLK.pop()
3381        result = getattr(self, blockname)()
3382        parsed = self.HLD.pop()
3383        self.PRP.pop()
3384        self.depth = len(self.BLK)
3385        if blockname == "table":
3386            del self.tableparser
3387
3388        # Inserting a nested block into mother
3389        if self.block():
3390            if blockname != "comment":  # ignore comment blocks
3391                if self.block().endswith("list"):
3392                    self.HLD[-1][-1].append(result)
3393                else:
3394                    self.HLD[-1].append(result)
3395            # Reset now. Mother block will have it all
3396            result = []
3397
3398        Debug("block -- ({}): {}".format(blockname, self.BLK), 3)
3399        Debug("RELEASED ({}): {}".format(blockname, parsed), 3)
3400
3401        # Save this top level block name (produced output)
3402        # The next block will use it
3403        if result:
3404            self.last = blockname
3405            Debug("BLOCK: %s" % result, 6)
3406
3407        return result
3408
3409    def _last_escapes(self, line):
3410        return doFinalEscape(TARGET, line)
3411
3412    def _get_escaped_hold(self):
3413        ret = []
3414        for line in self.hold():
3415            if isinstance(line, list):
3416                ret.extend(line)
3417            else:
3418                ret.append(self._last_escapes(line))
3419        return ret
3420
3421    def _remove_twoblanks(self, lastitem):
3422        if len(lastitem) > 1 and lastitem[-2:] == ["", ""]:
3423            return lastitem[:-2]
3424        return lastitem
3425
3426    def _should_add_blank_line(self, where, blockname):
3427        "Validates the blanksaround* rules"
3428
3429        # Nestable blocks: only mother blocks (level 1) are spaced
3430        if blockname.endswith("list") and self.depth > 1:
3431            return False
3432
3433        # The blank line after the block is always added
3434        if where == "after" and rules["blanksaround" + blockname]:
3435            return True
3436
3437        # The blank line before the block is only added if
3438        # the previous block haven't added a blank line
3439        # (to avoid consecutive blanks)
3440        elif (
3441            where == "before"
3442            and rules["blanksaround" + blockname]
3443            and not rules.get("blanksaround" + self.last)
3444        ):
3445            return True
3446
3447        # Nested quotes are handled here,
3448        # because the mother quote isn't closed yet
3449        elif (
3450            where == "before"
3451            and blockname == "quote"
3452            and rules["blanksaround" + blockname]
3453            and self.depth > 1
3454        ):
3455            return True
3456
3457        return False
3458
3459    def comment(self):
3460        return ""
3461
3462    def raw(self):
3463        lines = self.hold()
3464        return [doEscape(TARGET, x) for x in lines]
3465
3466    def tagged(self):
3467        return self.hold()
3468
3469    def para(self):
3470        result = []
3471        open_ = TAGS["paragraphOpen"]
3472        close = TAGS["paragraphClose"]
3473        lines = self._get_escaped_hold()
3474
3475        # Blank line before?
3476        if self._should_add_blank_line("before", "para"):
3477            result.append("")
3478
3479        # Open tag
3480        if open_:
3481            result.append(open_)
3482
3483        # Pagemaker likes a paragraph as a single long line
3484        if rules["onelinepara"]:
3485            result.append(" ".join(lines))
3486        # Others are normal :)
3487        else:
3488            result.extend(lines)
3489
3490        # Close tag
3491        if close:
3492            result.append(close)
3493
3494        # Blank line after?
3495        if self._should_add_blank_line("after", "para"):
3496            result.append("")
3497
3498        return result
3499
3500    def verb(self):
3501        "Verbatim lines are not masked, so there's no need to unmask"
3502        result = []
3503        open_ = TAGS["blockVerbOpen"]
3504        close = TAGS["blockVerbClose"]
3505
3506        # Blank line before?
3507        if self._should_add_blank_line("before", "verb"):
3508            result.append("")
3509
3510        # Open tag
3511        if open_:
3512            result.append(open_)
3513
3514        # Get contents
3515        for line in self.hold():
3516            if not rules["verbblocknotescaped"]:
3517                line = doEscape(TARGET, line)
3518            if TAGS["blockVerbLine"]:
3519                line = TAGS["blockVerbLine"] + line
3520            if rules["indentverbblock"]:
3521                line = "  " + line
3522            if rules["verbblockfinalescape"]:
3523                line = doFinalEscape(TARGET, line)
3524            result.append(line)
3525
3526        # Close tag
3527        if close:
3528            result.append(close)
3529
3530        # Blank line after?
3531        if self._should_add_blank_line("after", "verb"):
3532            result.append("")
3533
3534        return result
3535
3536    def numtitle(self):
3537        return self.title("numtitle")
3538
3539    def title(self, name="title"):
3540        result = []
3541
3542        # Blank line before?
3543        if self._should_add_blank_line("before", name):
3544            result.append("")
3545
3546        # Get contents
3547        result.extend(TITLE.get())
3548
3549        # Blank line after?
3550        if self._should_add_blank_line("after", name):
3551            result.append("")
3552
3553        return result
3554
3555    def table(self):
3556        result = []
3557
3558        # Blank line before?
3559        if self._should_add_blank_line("before", "table"):
3560            result.append("")
3561
3562        # Rewrite all table cells by the unmasked and escaped data
3563        lines = self._get_escaped_hold()
3564        for i in range(len(lines)):
3565            cells = lines[i].split(SEPARATOR)
3566            self.tableparser.rows[i]["cells"] = cells
3567        result.extend(self.tableparser.dump())
3568
3569        # Blank line after?
3570        if self._should_add_blank_line("after", "table"):
3571            result.append("")
3572
3573        return result
3574
3575    def quote(self):
3576        result = []
3577        open_ = TAGS["blockQuoteOpen"]  # block based
3578        close = TAGS["blockQuoteClose"]
3579        qline = TAGS["blockQuoteLine"]  # line based
3580        indent = tagindent = "\t" * self.depth
3581
3582        # Apply rules
3583        if rules["tagnotindentable"]:
3584            tagindent = ""
3585        if not rules["keepquoteindent"]:
3586            indent = ""
3587
3588        # Blank line before?
3589        if self._should_add_blank_line("before", "quote"):
3590            result.append("")
3591
3592        # Open tag
3593        if open_:
3594            result.append(tagindent + open_)
3595
3596        # Get contents
3597        for item in self.hold():
3598            if isinstance(item, list):
3599                result.extend(item)  # subquotes
3600            else:
3601                item = regex["quote"].sub("", item)  # del TABs
3602                item = self._last_escapes(item)
3603                item = qline * self.depth + item
3604                result.append(indent + item)  # quote line
3605
3606        # Close tag
3607        if close:
3608            result.append(tagindent + close)
3609
3610        # Blank line after?
3611        if self._should_add_blank_line("after", "quote"):
3612            result.append("")
3613
3614        return result
3615
3616    def bar(self):
3617        result = []
3618        bar_tag = ""
3619
3620        # Blank line before?
3621        if self._should_add_blank_line("before", "bar"):
3622            result.append("")
3623
3624        # Get the original bar chars
3625        bar_chars = self.hold()[0].strip()
3626
3627        # Set bar type
3628        if bar_chars.startswith("="):
3629            bar_tag = TAGS["bar2"]
3630        else:
3631            bar_tag = TAGS["bar1"]
3632
3633        # To avoid comment tag confusion like <!-- ------ --> (sgml)
3634        if TAGS["comment"].count("--"):
3635            bar_chars = bar_chars.replace("--", "__")
3636
3637        # Get the bar tag (may contain \a)
3638        result.append(regex["x"].sub(bar_chars, bar_tag))
3639
3640        # Blank line after?
3641        if self._should_add_blank_line("after", "bar"):
3642            result.append("")
3643
3644        return result
3645
3646    def deflist(self):
3647        return self.list("deflist")
3648
3649    def numlist(self):
3650        return self.list("numlist")
3651
3652    def list(self, name="list"):
3653        result = []
3654        items = self.hold()
3655        indent = self.prop("indent")
3656        tagindent = indent
3657        listline = TAGS.get(name + "ItemLine")
3658        itemcount = 0
3659
3660        if name == "deflist":
3661            itemopen = TAGS[name + "Item1Open"]
3662            itemclose = TAGS[name + "Item2Close"]
3663            itemsep = TAGS[name + "Item1Close"] + TAGS[name + "Item2Open"]
3664        else:
3665            itemopen = TAGS[name + "ItemOpen"]
3666            itemclose = TAGS[name + "ItemClose"]
3667            itemsep = ""
3668
3669        # Apply rules
3670        if rules["tagnotindentable"]:
3671            tagindent = ""
3672        if not rules["keeplistindent"]:
3673            indent = tagindent = ""
3674
3675        # ItemLine: number of leading chars identifies list depth
3676        if listline:
3677            itemopen = listline * self.depth + itemopen
3678
3679        # Adds trailing space on opening tags
3680        if (name == "list" and rules["spacedlistitemopen"]) or (
3681            name == "numlist" and rules["spacednumlistitemopen"]
3682        ):
3683            itemopen = itemopen + " "
3684
3685        # Remove two-blanks from list ending mark, to avoid <p>
3686        items[-1] = self._remove_twoblanks(items[-1])
3687
3688        # Blank line before?
3689        if self._should_add_blank_line("before", name):
3690            result.append("")
3691
3692        # Tag each list item (multiline items), store in listbody
3693        itemopenorig = itemopen
3694        listbody = []
3695        widelist = 0
3696        for item in items:
3697
3698            # Add "manual" item count for noautonum targets
3699            itemcount += 1
3700            if name == "numlist" and not rules["autonumberlist"]:
3701                n = str(itemcount)
3702                itemopen = regex["x"].sub(n, itemopenorig)
3703                del n
3704
3705            # Tag it
3706            item[0] = self._last_escapes(item[0])
3707            if name == "deflist":
3708                _, term, rest = item[0].split(SEPARATOR, 2)
3709                item[0] = rest
3710                if not item[0]:
3711                    del item[0]  # to avoid <p>
3712                listbody.append(tagindent + itemopen + term + itemsep)
3713            else:
3714                fullitem = tagindent + itemopen
3715                listbody.append(item[0].replace(SEPARATOR, fullitem))
3716                del item[0]
3717
3718            # Process next lines for this item (if any)
3719            for line in item:
3720                if isinstance(line, list):  # sublist inside
3721                    listbody.extend(line)
3722                else:
3723                    line = self._last_escapes(line)
3724
3725                    # Blank lines turns to <p>
3726                    if not line and rules["parainsidelist"]:
3727                        line = indent + TAGS["paragraphOpen"] + TAGS["paragraphClose"]
3728                        line = line.rstrip()
3729                        widelist = 1
3730
3731                    # Some targets don't like identation here (wiki)
3732                    if not rules["keeplistindent"] or (
3733                        name == "deflist" and rules["deflisttextstrip"]
3734                    ):
3735                        line = line.lstrip()
3736
3737                    # Maybe we have a line prefix to add? (wiki)
3738                    if name == "deflist" and TAGS["deflistItem2LinePrefix"]:
3739                        line = TAGS["deflistItem2LinePrefix"] + line
3740
3741                    listbody.append(line)
3742
3743            # Close item (if needed)
3744            if itemclose:
3745                listbody.append(tagindent + itemclose)
3746
3747        if not widelist and rules["compactlist"]:
3748            listopen = TAGS.get(name + "OpenCompact")
3749            listclose = TAGS.get(name + "CloseCompact")
3750        else:
3751            listopen = TAGS.get(name + "Open")
3752            listclose = TAGS.get(name + "Close")
3753
3754        # Open list (not nestable lists are only opened at mother)
3755        if listopen and not (rules["listnotnested"] and BLOCK.depth != 1):
3756            result.append(tagindent + listopen)
3757
3758        result.extend(listbody)
3759
3760        # Close list (not nestable lists are only closed at mother)
3761        if listclose and not (rules["listnotnested"] and self.depth != 1):
3762            result.append(tagindent + listclose)
3763
3764        # Blank line after?
3765        if self._should_add_blank_line("after", name):
3766            result.append("")
3767
3768        return result
3769
3770
3771##############################################################################
3772
3773
3774def listTargets():
3775    """List available targets."""
3776    for target, name in sorted(TARGET_NAMES.items()):
3777        print("{:8}{}".format(target, name))
3778
3779
3780def get_file_body(file_):
3781    "Returns all the document BODY lines"
3782    return process_source_file(file_, noconf=1)[1][2]
3783
3784
3785def finish_him(outlist, config):
3786    "Writing output to screen or file"
3787    outfile = config["outfile"]
3788    outlist = unmaskEscapeChar(outlist)
3789    outlist = expandLineBreaks(outlist)
3790
3791    # Apply PostProc filters
3792    if config["postproc"]:
3793        filters = compile_filters(config["postproc"], "Invalid PostProc filter regex")
3794        postoutlist = []
3795        errmsg = "Invalid PostProc filter replacement"
3796        for line in outlist:
3797            for rgx, repl in filters:
3798                try:
3799                    line = rgx.sub(repl, line)
3800                except Exception:
3801                    Error("{}: '{}'".format(errmsg, repl))
3802            postoutlist.append(line)
3803        outlist = postoutlist[:]
3804
3805    if outfile == MODULEOUT:
3806        return outlist
3807    elif outfile == STDOUT:
3808        Message("Saving results to the output file", 1)
3809        for line in outlist:
3810            print(line)
3811    else:
3812        Message("Saving results to the output file", 1)
3813        Savefile(outfile, outlist)
3814        if not QUIET:
3815            print("{} wrote {}".format(my_name, outfile))
3816
3817
3818def toc_tagger(toc, config):
3819    "Returns the tagged TOC, as a single tag or a tagged list"
3820    if not config["toc"]:
3821        return []
3822    elif TAGS["TOC"]:
3823        # Our TOC list is not needed, the target already knows how to do a TOC
3824        ret = [TAGS["TOC"]]
3825    # Convert the TOC list (t2t-marked) to the target's list format
3826    else:
3827        fakeconf = config.copy()
3828        fakeconf["headers"] = 0
3829        fakeconf["preproc"] = []
3830        fakeconf["postproc"] = []
3831        ret, _ = convert(toc, fakeconf)
3832        set_global_config(config)  # restore config
3833    return ret
3834
3835
3836def toc_formatter(toc, config):
3837    "Formats TOC for automatic placement between headers and body"
3838
3839    if not config["toc"]:
3840        return []  # TOC disabled
3841    ret = toc
3842
3843    # TOC open/close tags (if any)
3844    if TAGS["tocOpen"]:
3845        ret.insert(0, TAGS["tocOpen"])
3846    if TAGS["tocClose"]:
3847        ret.append(TAGS["tocClose"])
3848
3849    # Autotoc specific formatting
3850    if rules["autotocwithbars"]:  # TOC between bars
3851        para = TAGS["paragraphOpen"] + TAGS["paragraphClose"]
3852        bar = regex["x"].sub("-" * DFT_TEXT_WIDTH, TAGS["bar1"])
3853        tocbar = [para, bar, para]
3854        ret = tocbar + ret + tocbar
3855    if rules["blankendautotoc"]:  # blank line after TOC
3856        ret.append("")
3857    if rules["autotocnewpagebefore"]:  # page break before TOC
3858        ret.insert(0, TAGS["pageBreak"])
3859    if rules["autotocnewpageafter"]:  # page break after TOC
3860        ret.append(TAGS["pageBreak"])
3861    return ret
3862
3863
3864def doHeader(headers, config):
3865    if not config["headers"]:
3866        return []
3867    if not headers:
3868        headers = ["", "", ""]
3869    target = config["target"]
3870
3871    template = HEADER_TEMPLATE[target].split("\n")
3872
3873    style = config.get("style")
3874    # Tex: strip .sty extension from each style filename.
3875    if target == "tex":
3876        style = [os.path.splitext(x)[0] for x in style]
3877
3878    head_data = {"STYLE": style, "ENCODING": get_encoding_string(target)}
3879
3880    # Parse header contents
3881    for i in 0, 1, 2:
3882        contents = headers[i]
3883        # Escapes - on tex, just do it if any \tag{} present
3884        if target != "tex" or (target == "tex" and re.search(r"\\\w+{", contents)):
3885            contents = doEscape(target, contents)
3886        if target in ["lout", "tex"]:
3887            contents = doFinalEscape(target, contents)
3888
3889        head_data["HEADER%d" % (i + 1)] = contents
3890
3891    Debug("Header Data: %s" % head_data, 1)
3892
3893    # Scan for empty dictionary keys
3894    # If found, scan template lines for that key reference
3895    # If found, remove the reference
3896    # If there isn't any other key reference on the same line, remove it
3897    # TODO loop by template line > key
3898    for key, value in head_data.items():
3899        if value:
3900            continue
3901        for line in template:
3902            if line.count("%%(%s)s" % key):
3903                sline = line.replace("%%(%s)s" % key, "")
3904                if not re.search(r"%\([A-Z0-9]+\)s", sline):
3905                    template.remove(line)
3906    # Style is a multiple tag.
3907    # - If none or just one, use default template
3908    # - If two or more, insert extra lines in a loop (and remove original)
3909    styles = head_data["STYLE"]
3910    if len(styles) == 1:
3911        head_data["STYLE"] = styles[0]
3912    elif len(styles) > 1:
3913        style_mark = "%(STYLE)s"
3914        for i in range(len(template)):
3915            if template[i].count(style_mark):
3916                while styles:
3917                    template.insert(
3918                        i + 1, template[i].replace(style_mark, styles.pop())
3919                    )
3920                del template[i]
3921                break
3922    # Populate template with data (dict expansion)
3923    template = "\n".join(template) % head_data
3924
3925    return template.split("\n")
3926
3927
3928def doFooter(config):
3929    ret = []
3930
3931    # No footer. The --no-headers option hides header AND footer
3932    if not config["headers"]:
3933        return []
3934
3935    # Only add blank line before footer if last block doesn't added by itself
3936    if not rules.get("blanksaround" + BLOCK.last):
3937        ret.append("")
3938
3939    # Maybe we have a specific tag to close the document?
3940    if TAGS["EOD"]:
3941        ret.append(TAGS["EOD"])
3942
3943    return ret
3944
3945
3946def doEscape(target, txt):
3947    "Target-specific special escapes. Apply *before* insert any tag."
3948    tmpmask = "vvvvThisEscapingSuxvvvv"
3949    if target in ("html", "sgml", "dbk"):
3950        txt = re.sub("&", "&amp;", txt)
3951        txt = re.sub("<", "&lt;", txt)
3952        txt = re.sub(">", "&gt;", txt)
3953        if target == "sgml":
3954            txt = re.sub("\xff", "&yuml;", txt)  # "+y
3955    elif target == "mgp":
3956        txt = re.sub("^%", " %", txt)  # add leading blank to avoid parse
3957    elif target == "man":
3958        txt = re.sub("^([.'])", "\\&\\1", txt)  # command ID
3959        txt = txt.replace(ESCCHAR, ESCCHAR + "e")  # \e
3960    elif target == "lout":
3961        # TIP: / moved to FinalEscape to avoid //italic//
3962        # TIP: these are also converted by lout:  ...  ---  --
3963        txt = txt.replace(ESCCHAR, tmpmask)  # \
3964        txt = txt.replace('"', '"%s""' % ESCCHAR)  # "\""
3965        txt = re.sub("([|&{}@#^~])", '"\\1"', txt)  # "@"
3966        txt = txt.replace(tmpmask, '"%s"' % (ESCCHAR * 2))  # "\\"
3967    elif target == "tex":
3968        # Mark literal \ to be changed to $\backslash$ later
3969        txt = txt.replace(ESCCHAR, tmpmask)
3970        txt = re.sub("([#$&%{}])", ESCCHAR + r"\1", txt)  # \%
3971        txt = re.sub("([~^])", ESCCHAR + r"\1{}", txt)  # \~{}
3972        txt = re.sub("([<|>])", r"$\1$", txt)  # $>$
3973        txt = txt.replace(tmpmask, maskEscapeChar(r"$\backslash$"))
3974        # TIP the _ is escaped at the end
3975    return txt
3976
3977
3978# TODO man: where - really needs to be escaped?
3979def doFinalEscape(target, txt):
3980    "Last escapes of each line"
3981    if target == "man":
3982        txt = txt.replace("-", r"\-")
3983    elif target == "sgml":
3984        txt = txt.replace("[", "&lsqb;")
3985    elif target == "lout":
3986        txt = txt.replace("/", '"/"')
3987    elif target == "tex":
3988        txt = txt.replace("_", r"\_")
3989        txt = txt.replace("vvvvTexUndervvvv", "_")  # shame!
3990        txt = txt.replace("vvvUnderscoreInRawTextvvv", "_")
3991        txt = txt.replace("vvvUnderscoreInTaggedTextvvv", "_")
3992    return txt
3993
3994
3995def EscapeCharHandler(action, data):
3996    "Mask/Unmask the Escape Char on the given string"
3997    if not data.strip():
3998        return data
3999    if action not in ("mask", "unmask"):
4000        Error("EscapeCharHandler: Invalid action '%s'" % action)
4001    if action == "mask":
4002        return data.replace("\\", ESCCHAR)
4003    else:
4004        return data.replace(ESCCHAR, "\\")
4005
4006
4007def maskEscapeChar(data):
4008    "Replace any escape char with a text mask (Input: str or list)"
4009    if isinstance(data, list):
4010        return [EscapeCharHandler("mask", x) for x in data]
4011    return EscapeCharHandler("mask", data)
4012
4013
4014def unmaskEscapeChar(data):
4015    "Undo the escape char masking (Input: str or list)"
4016    if isinstance(data, list):
4017        return [EscapeCharHandler("unmask", x) for x in data]
4018    return EscapeCharHandler("unmask", data)
4019
4020
4021# Convert ['foo\nbar'] to ['foo', 'bar']
4022def expandLineBreaks(mylist):
4023    ret = []
4024    for line in mylist:
4025        ret.extend(line.split("\n"))
4026    return ret
4027
4028
4029def compile_filters(filters, errmsg="Filter"):
4030    if filters:
4031        for i in range(len(filters)):
4032            patt, repl = filters[i]
4033            try:
4034                rgx = re.compile(patt)
4035            except Exception:
4036                Error("{}: '{}'".format(errmsg, patt))
4037            filters[i] = (rgx, repl)
4038    return filters
4039
4040
4041def enclose_me(tagname, txt):
4042    return TAGS.get(tagname + "Open") + txt + TAGS.get(tagname + "Close")
4043
4044
4045def beautify_me(name, font, line):
4046    "where name is: bold, italic, underline or strike"
4047
4048    # Exception: Doesn't parse an horizontal bar as strike
4049    if name == "strike" and regex["bar"].search(line):
4050        return line
4051
4052    open_ = TAGS["%sOpen" % font]
4053    close = TAGS["%sClose" % font]
4054    txt = r"{}\1{}".format(open_, close)
4055    line = regex[font].sub(txt, line)
4056    return line
4057
4058
4059def get_tagged_link(label, url):
4060    ret = ""
4061    target = CONF["target"]
4062    image_re = regex["img"]
4063
4064    # Set link type
4065    if regex["email"].match(url):
4066        linktype = "email"
4067    else:
4068        linktype = "url"
4069
4070    # Escape specials from TEXT parts
4071    label = doEscape(target, label)
4072
4073    # Escape specials from link URL
4074    if not rules["linkable"] or rules["escapeurl"]:
4075        url = doEscape(target, url)
4076
4077    # Adding protocol to guessed link
4078    guessurl = ""
4079    if linktype == "url" and re.match("(?i)" + regex["_urlskel"]["guess"], url):
4080        if url[0] in "Ww":
4081            guessurl = "http://" + url
4082        else:
4083            guessurl = "ftp://" + url
4084
4085        # Not link aware targets -> protocol is useless
4086        if not rules["linkable"]:
4087            guessurl = ""
4088
4089    # Simple link (not guessed)
4090    if not label and not guessurl:
4091        # Just add link data to tag
4092        tag = TAGS[linktype]
4093        ret = regex["x"].sub(url, tag)
4094
4095    # Named link or guessed simple link
4096    else:
4097        # Adjusts for guessed link
4098        if not label:
4099            label = url  # no protocol
4100        if guessurl:
4101            url = guessurl  # with protocol
4102
4103        # Image inside link!
4104        if image_re.match(label):
4105            if rules["imglinkable"]:  # get image tag
4106                label = parse_images(label)
4107            else:
4108                # img@link !supported
4109                label = "(%s)" % image_re.match(label).group(1)
4110
4111        # Putting data on the right appearance order
4112        if rules["labelbeforelink"] or not rules["linkable"]:
4113            urlorder = [label, url]  # label before link
4114        else:
4115            urlorder = [url, label]  # link before label
4116
4117        # Add link data to tag (replace \a's)
4118        ret = TAGS["%sMark" % linktype]
4119        for data in urlorder:
4120            ret = regex["x"].sub(data, ret, 1)
4121
4122    return ret
4123
4124
4125def parse_deflist_term(line):
4126    "Extract and parse definition list term contents"
4127    img_re = regex["img"]
4128    term = regex["deflist"].search(line).group(3)
4129
4130    # Mask image inside term as (image.jpg), where not supported
4131    if not rules["imgasdefterm"] and img_re.search(term):
4132        while img_re.search(term):
4133            imgfile = img_re.search(term).group(1)
4134            term = img_re.sub("(%s)" % imgfile, term, 1)
4135
4136    # TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
4137    return term
4138
4139
4140def get_image_align(line):
4141    "Return the image (first found) align for the given line"
4142
4143    # First clear marks that can mess align detection
4144    line = re.sub(SEPARATOR + "$", "", line)  # remove deflist sep
4145    line = re.sub("^" + SEPARATOR, "", line)  # remove list sep
4146    line = re.sub("^[\t]+", "", line)  # remove quote mark
4147
4148    # Get image position on the line
4149    m = regex["img"].search(line)
4150    ini = m.start()
4151    head = 0
4152    end = m.end()
4153    tail = len(line)
4154
4155    # The align detection algorithm
4156    if ini == head and end != tail:
4157        align = "left"  # ^img + text$
4158    elif ini != head and end == tail:
4159        align = "right"  # ^text + img$
4160    else:
4161        align = "center"  # default align
4162
4163    # Some special cases
4164    if BLOCK.isblock("table"):
4165        align = "center"  # ignore when table
4166
4167    return align
4168
4169
4170def get_encoding_string(target):
4171    return "utf8" if target == "tex" else "utf-8"
4172
4173
4174def process_source_file(file_="", noconf=0, contents=None):
4175    """
4176    Find and Join all the configuration available for a source file.
4177    No sanity checking is done on this step.
4178    It also extracts the source document parts into separate holders.
4179
4180    The config scan order is:
4181            1. The user configuration file (i.e. $HOME/.txt2tagsrc)
4182            2. The source document's CONF area
4183            3. The command line options
4184
4185    The return data is a tuple of two items:
4186            1. The parsed config dictionary
4187            2. The document's parts, as a (head, conf, body) tuple
4188
4189    All the conversion process will be based on the data and
4190    configuration returned by this function.
4191    The source file is read in this step only.
4192    """
4193    if contents:
4194        source = SourceDocument(contents=contents)
4195    else:
4196        source = SourceDocument(file_)
4197    head, conf, body = source.split()
4198    Message("Source document contents stored", 2)
4199    if not noconf:
4200        # Read document config
4201        source_raw = source.get_raw_config()
4202        # Join all the config directives found, then parse it
4203        full_raw = RC_RAW + source_raw + CMDLINE_RAW
4204        Message("Parsing and saving all config found (%03d items)" % (len(full_raw)), 1)
4205        full_parsed = ConfigMaster(full_raw).parse()
4206        # Add manually the filename to the conf dic
4207        if contents:
4208            full_parsed["sourcefile"] = MODULEIN
4209            full_parsed["infile"] = MODULEIN
4210            full_parsed["outfile"] = MODULEOUT
4211        else:
4212            full_parsed["sourcefile"] = file_
4213        Debug("Complete config: %s" % full_parsed, 1)
4214    else:
4215        full_parsed = {}
4216    return full_parsed, (head, conf, body)
4217
4218
4219def convert_file(headers, body, config, first_body_lineno=1):
4220    config = ConfigMaster().sanity(config)
4221    # Compose the target file Headers
4222    # TODO escape line before?
4223    # TODO see exceptions by tex and mgp
4224    Message("Composing target Headers", 1)
4225    target_head = doHeader(headers, config)
4226    # Parse the full marked body into tagged target
4227
4228    Message("Composing target Body", 1)
4229    target_body, marked_toc = convert(body, config, firstlinenr=first_body_lineno)
4230
4231    # Compose the target file Footer
4232    Message("Composing target Footer", 1)
4233    target_foot = doFooter(config)
4234
4235    # Make TOC (if needed)
4236    Message("Composing target TOC", 1)
4237    tagged_toc = toc_tagger(marked_toc, config)
4238    target_toc = toc_formatter(tagged_toc, config)
4239
4240    # Finally, we have our document
4241    outlist = target_head + target_toc + target_body + target_foot
4242    return finish_him(outlist, config)
4243
4244
4245def parse_images(line):
4246    "Tag all images found"
4247    while regex["img"].search(line) and TAGS["img"] != "[\a]":
4248        txt = regex["img"].search(line).group(1)
4249        tag = TAGS["img"]
4250
4251        # If target supports image alignment, here we go
4252        if rules["imgalignable"]:
4253
4254            align = get_image_align(line)  # right
4255            align_name = align.capitalize()  # Right
4256
4257            # The align is a full tag, or part of the image tag (~A~)
4258            if TAGS["imgAlign" + align_name]:
4259                tag = TAGS["imgAlign" + align_name]
4260            else:
4261                align_tag = TAGS["_imgAlign" + align_name]
4262                tag = regex["_imgAlign"].sub(align_tag, tag, 1)
4263
4264        if TARGET == "tex":
4265            tag = re.sub(r"\\b", r"\\\\b", tag)
4266            txt = txt.replace("_", "vvvvTexUndervvvv")
4267
4268        # Ugly hack to avoid infinite loop when target's image tag contains []
4269        tag = tag.replace("[", "vvvvEscapeSquareBracketvvvv")
4270
4271        line = regex["img"].sub(tag, line, 1)
4272        line = regex["x"].sub(txt, line, 1)
4273    return line.replace("vvvvEscapeSquareBracketvvvv", "[")
4274
4275
4276def add_inline_tags(line):
4277    # Beautifiers
4278    for beauti, font in [
4279        ("bold", "fontBold"),
4280        ("italic", "fontItalic"),
4281        ("underline", "fontUnderline"),
4282        ("strike", "fontStrike"),
4283    ]:
4284        if regex[font].search(line):
4285            line = beautify_me(beauti, font, line)
4286
4287    line = parse_images(line)
4288    return line
4289
4290
4291def get_include_contents(file_, path=""):
4292    "Parses %!include: value and extract file contents"
4293    ids = {"`": "verb", '"': "raw", "'": "tagged"}
4294    id_ = "t2t"
4295    # Set include type and remove identifier marks
4296    mark = file_[0]
4297    if mark in ids.keys():
4298        if file_[:2] == file_[-2:] == mark * 2:
4299            id_ = ids[mark]  # set type
4300            file_ = file_[2:-2]  # remove marks
4301    # Handle remote dir execution
4302    filepath = os.path.join(path, file_)
4303    # Read included file contents
4304    lines = Readfile(filepath)
4305    # Default txt2tags marked text, just BODY matters
4306    if id_ == "t2t":
4307        lines = get_file_body(filepath)
4308        # TODO fix images relative path if file has a path, ie.:
4309        # chapter1/index.t2t (wait until tree parsing)
4310        # TODO for the images path fix, also respect outfile path,
4311        # if different from infile (wait until tree parsing)
4312        lines.insert(0, "%INCLUDED({}) starts here: {}".format(id_, file_))
4313        # This appears when included hit EOF with verbatim area open
4314        # lines.append('%%INCLUDED(%s) ends here: %s'%(id_,file_))
4315    return id_, lines
4316
4317
4318def set_global_config(config):
4319    global CONF, TAGS, regex, rules, TARGET
4320    CONF = config
4321    rules = getRules(CONF)
4322    TAGS = getTags(CONF)
4323    regex = getRegexes()
4324    TARGET = config["target"]  # save for buggy functions that need global
4325
4326
4327def convert(bodylines, config, firstlinenr=1):
4328    global BLOCK, TITLE
4329
4330    set_global_config(config)
4331
4332    target = config["target"]
4333    BLOCK = BlockMaster()
4334    MASK = MaskMaster()
4335    TITLE = TitleMaster()
4336
4337    ret = []
4338    f_lastwasblank = 0
4339
4340    # Compiling all PreProc regexes
4341    pre_filter = compile_filters(CONF["preproc"], "Invalid PreProc filter regex")
4342
4343    # Let's mark it up!
4344    linenr = firstlinenr - 1
4345    lineref = 0
4346    while lineref < len(bodylines):
4347        # Defaults
4348        MASK.reset()
4349        results_box = ""
4350
4351        untouchedline = bodylines[lineref]
4352
4353        line = re.sub("[\n\r]+$", "", untouchedline)  # del line break
4354
4355        # Apply PreProc filters
4356        if pre_filter:
4357            errmsg = "Invalid PreProc filter replacement"
4358            for rgx, repl in pre_filter:
4359                try:
4360                    line = rgx.sub(repl, line)
4361                except Exception:
4362                    Error("{}: '{}'".format(errmsg, repl))
4363
4364        line = maskEscapeChar(line)  # protect \ char
4365        linenr += 1
4366        lineref += 1
4367
4368        Debug(repr(line), 2, linenr)  # heavy debug: show each line
4369
4370        # ------------------[ Comment Block ]------------------------
4371
4372        # We're already on a comment block
4373        if BLOCK.block() == "comment":
4374
4375            # Closing comment
4376            if regex["blockCommentClose"].search(line):
4377                ret.extend(BLOCK.blockout() or [])
4378                continue
4379
4380            # Normal comment-inside line. Ignore it.
4381            continue
4382
4383        # Detecting comment block init
4384        if (
4385            regex["blockCommentOpen"].search(line)
4386            and BLOCK.block() not in BLOCK.exclusive
4387        ):
4388            ret.extend(BLOCK.blockin("comment"))
4389            continue
4390
4391        # -------------------------[ Tagged Text ]----------------------
4392
4393        # We're already on a tagged block
4394        if BLOCK.block() == "tagged":
4395
4396            # Closing tagged
4397            if regex["blockTaggedClose"].search(line):
4398                ret.extend(BLOCK.blockout())
4399                continue
4400
4401            # Normal tagged-inside line
4402            BLOCK.holdadd(line)
4403            continue
4404
4405        # Detecting tagged block init
4406        if (
4407            regex["blockTaggedOpen"].search(line)
4408            and BLOCK.block() not in BLOCK.exclusive
4409        ):
4410            ret.extend(BLOCK.blockin("tagged"))
4411            continue
4412
4413        # One line tagged text
4414        if regex["1lineTagged"].search(line) and BLOCK.block() not in BLOCK.exclusive:
4415            ret.extend(BLOCK.blockin("tagged"))
4416            line = regex["1lineTagged"].sub("", line)
4417            BLOCK.holdadd(line)
4418            ret.extend(BLOCK.blockout())
4419            continue
4420
4421        # -------------------------[ Raw Text ]----------------------
4422
4423        # We're already on a raw block
4424        if BLOCK.block() == "raw":
4425
4426            # Closing raw
4427            if regex["blockRawClose"].search(line):
4428                ret.extend(BLOCK.blockout())
4429                continue
4430
4431            # Normal raw-inside line
4432            BLOCK.holdadd(line)
4433            continue
4434
4435        # Detecting raw block init
4436        if regex["blockRawOpen"].search(line) and BLOCK.block() not in BLOCK.exclusive:
4437            ret.extend(BLOCK.blockin("raw"))
4438            continue
4439
4440        # One line raw text
4441        if regex["1lineRaw"].search(line) and BLOCK.block() not in BLOCK.exclusive:
4442            ret.extend(BLOCK.blockin("raw"))
4443            line = regex["1lineRaw"].sub("", line)
4444            BLOCK.holdadd(line)
4445            ret.extend(BLOCK.blockout())
4446            continue
4447
4448        # ------------------------[ Verbatim  ]----------------------
4449
4450        # TIP We'll never support beautifiers inside verbatim
4451
4452        # Closing table mapped to verb
4453        if (
4454            BLOCK.block() == "verb"
4455            and BLOCK.prop("mapped") == "table"
4456            and not regex["table"].search(line)
4457        ):
4458            ret.extend(BLOCK.blockout())
4459
4460        # We're already on a verb block
4461        if BLOCK.block() == "verb":
4462
4463            # Closing verb
4464            if regex["blockVerbClose"].search(line):
4465                ret.extend(BLOCK.blockout())
4466                continue
4467
4468            # Normal verb-inside line
4469            BLOCK.holdadd(line)
4470            continue
4471
4472        # Detecting verb block init
4473        if regex["blockVerbOpen"].search(line) and BLOCK.block() not in BLOCK.exclusive:
4474            ret.extend(BLOCK.blockin("verb"))
4475            f_lastwasblank = 0
4476            continue
4477
4478        # One line verb-formatted text
4479        if regex["1lineVerb"].search(line) and BLOCK.block() not in BLOCK.exclusive:
4480            ret.extend(BLOCK.blockin("verb"))
4481            line = regex["1lineVerb"].sub("", line)
4482            BLOCK.holdadd(line)
4483            ret.extend(BLOCK.blockout())
4484            f_lastwasblank = 0
4485            continue
4486
4487        # Tables are mapped to verb when target is not table-aware
4488        if not rules["tableable"] and regex["table"].search(line):
4489            if not BLOCK.isblock("verb"):
4490                ret.extend(BLOCK.blockin("verb"))
4491                BLOCK.propset("mapped", "table")
4492                BLOCK.holdadd(line)
4493                continue
4494
4495        # ---------------------[ blank lines ]-----------------------
4496
4497        if regex["blankline"].search(line):
4498
4499            # Close open paragraph
4500            if BLOCK.isblock("para"):
4501                ret.extend(BLOCK.blockout())
4502                f_lastwasblank = 1
4503                continue
4504
4505            # Close all open tables
4506            if BLOCK.isblock("table"):
4507                ret.extend(BLOCK.blockout())
4508                f_lastwasblank = 1
4509                continue
4510
4511            # Close all open quotes
4512            while BLOCK.isblock("quote"):
4513                ret.extend(BLOCK.blockout())
4514
4515            # Closing all open lists
4516            if f_lastwasblank:  # 2nd consecutive blank
4517                if BLOCK.block().endswith("list"):
4518                    BLOCK.holdaddsub("")  # helps parser
4519                while BLOCK.depth:  # closes list (if any)
4520                    ret.extend(BLOCK.blockout())
4521                continue  # ignore consecutive blanks
4522
4523            # Paragraph (if any) is wanted inside lists also
4524            if BLOCK.block().endswith("list"):
4525                BLOCK.holdaddsub("")
4526
4527            f_lastwasblank = 1
4528            continue
4529
4530        # ---------------------[ special ]---------------------------
4531
4532        if regex["special"].search(line):
4533
4534            targ, key, val = ConfigLines().parse_line(line, None, target)
4535
4536            if key:
4537                Debug("Found config '{}', value '{}'".format(key, val), 1, linenr)
4538            else:
4539                Debug("Bogus Special Line", 1, linenr)
4540
4541            # %!include command
4542            if key == "include":
4543                incpath = os.path.dirname(CONF["sourcefile"])
4544                incfile = val
4545                err = "A file cannot include itself (loop!)"
4546                if CONF["sourcefile"] == incfile:
4547                    Error("{}: {}".format(err, incfile))
4548                inctype, inclines = get_include_contents(incfile, incpath)
4549
4550                # Verb, raw and tagged are easy
4551                if inctype != "t2t":
4552                    ret.extend(BLOCK.blockin(inctype))
4553                    BLOCK.holdextend(inclines)
4554                    ret.extend(BLOCK.blockout())
4555                else:
4556                    # Insert include lines into body
4557                    # TODO include maxdepth limit
4558                    bodylines = bodylines[:lineref] + inclines + bodylines[lineref:]
4559
4560                # This line is done, go to next
4561                continue
4562
4563        # ---------------------[ Comments ]--------------------------
4564
4565        # Just skip them
4566        if regex["comment"].search(line):
4567            continue
4568
4569        # ---------------------[ Triggers ]--------------------------
4570
4571        # Valid line, reset blank status
4572        f_lastwasblank = 0
4573
4574        # Any NOT quote line closes all open quotes
4575        if BLOCK.isblock("quote") and not regex["quote"].search(line):
4576            while BLOCK.isblock("quote"):
4577                ret.extend(BLOCK.blockout())
4578
4579        # Any NOT table line closes an open table
4580        if BLOCK.isblock("table") and not regex["table"].search(line):
4581            ret.extend(BLOCK.blockout())
4582
4583        # ---------------------[ Horizontal Bar ]--------------------
4584
4585        if regex["bar"].search(line):
4586
4587            # Bars inside quotes are handled on the Quote processing
4588            # Otherwise we parse the bars right here
4589            #
4590            if not (BLOCK.isblock("quote") or regex["quote"].search(line)) or (
4591                BLOCK.isblock("quote") and not rules["barinsidequote"]
4592            ):
4593
4594                # Close all the opened blocks
4595                ret.extend(BLOCK.blockin("bar"))
4596
4597                # Extract the bar chars (- or =)
4598                m = regex["bar"].search(line)
4599                bar_chars = m.group(2)
4600
4601                # Process and dump the tagged bar
4602                BLOCK.holdadd(bar_chars)
4603                ret.extend(BLOCK.blockout())
4604                Debug("BAR: %s" % line, 6)
4605
4606                # We're done, nothing more to process
4607                continue
4608
4609        # ---------------------[ Title ]-----------------------------
4610
4611        if (
4612            regex["title"].search(line) or regex["numtitle"].search(line)
4613        ) and not BLOCK.block().endswith("list"):
4614
4615            if regex["title"].search(line):
4616                name = "title"
4617            else:
4618                name = "numtitle"
4619
4620            # Close all the opened blocks
4621            ret.extend(BLOCK.blockin(name))
4622
4623            # Process title
4624            TITLE.add(line)
4625            ret.extend(BLOCK.blockout())
4626
4627            # We're done, nothing more to process
4628            continue
4629
4630        # ---------------------[ apply masks ]-----------------------
4631
4632        line = MASK.mask(line)
4633
4634        # XXX from here, only block-inside lines will pass
4635
4636        # ---------------------[ Quote ]-----------------------------
4637
4638        if regex["quote"].search(line):
4639
4640            # Store number of leading TABS
4641            quotedepth = len(regex["quote"].search(line).group(0))
4642
4643            # SGML doesn't support nested quotes
4644            if rules["quotenotnested"]:
4645                quotedepth = 1
4646
4647            # Don't cross depth limit
4648            maxdepth = rules["quotemaxdepth"]
4649            if maxdepth and quotedepth > maxdepth:
4650                quotedepth = maxdepth
4651
4652            # New quote
4653            if not BLOCK.isblock("quote"):
4654                ret.extend(BLOCK.blockin("quote"))
4655
4656            # New subquotes
4657            while BLOCK.depth < quotedepth:
4658                BLOCK.blockin("quote")
4659
4660            # Closing quotes
4661            while quotedepth < BLOCK.depth:
4662                ret.extend(BLOCK.blockout())
4663
4664            # Bar inside quote
4665            if regex["bar"].search(line) and rules["barinsidequote"]:
4666                tempBlock = BlockMaster()
4667                tagged_bar = []
4668                tagged_bar.extend(tempBlock.blockin("bar"))
4669                tempBlock.holdadd(line)
4670                tagged_bar.extend(tempBlock.blockout())
4671                BLOCK.holdextend(tagged_bar)
4672                continue
4673
4674        # ---------------------[ Lists ]-----------------------------
4675
4676        # An empty item also closes the current list
4677        if BLOCK.block().endswith("list"):
4678            m = regex["listclose"].match(line)
4679            if m:
4680                listindent = m.group(1)
4681                listtype = m.group(2)
4682                currlisttype = BLOCK.prop("type")
4683                currlistindent = BLOCK.prop("indent")
4684                if listindent == currlistindent and listtype == currlisttype:
4685                    ret.extend(BLOCK.blockout())
4686                    continue
4687
4688        if (
4689            regex["list"].search(line)
4690            or regex["numlist"].search(line)
4691            or regex["deflist"].search(line)
4692        ):
4693
4694            listindent = BLOCK.prop("indent")
4695            listids = "".join(LISTNAMES.keys())
4696            m = re.match("^( *)([%s]) " % re.escape(listids), line)
4697            listitemindent = m.group(1)
4698            listtype = m.group(2)
4699            listname = LISTNAMES[listtype]
4700            results_box = BLOCK.holdadd
4701
4702            # Del list ID (and separate term from definition)
4703            if listname == "deflist":
4704                term = parse_deflist_term(line)
4705                line = regex["deflist"].sub(SEPARATOR + term + SEPARATOR, line)
4706            else:
4707                line = regex[listname].sub(SEPARATOR, line)
4708
4709            # Don't cross depth limit
4710            maxdepth = rules["listmaxdepth"]
4711            if maxdepth and BLOCK.depth == maxdepth:
4712                if len(listitemindent) > len(listindent):
4713                    listitemindent = listindent
4714
4715            # List bumping (same indent, diff mark)
4716            # Close the currently open list to clear the mess
4717            if (
4718                BLOCK.block().endswith("list")
4719                and listname != BLOCK.block()
4720                and len(listitemindent) == len(listindent)
4721            ):
4722                ret.extend(BLOCK.blockout())
4723                listindent = BLOCK.prop("indent")
4724
4725            # Open mother list or sublist
4726            if not BLOCK.block().endswith("list") or len(listitemindent) > len(
4727                listindent
4728            ):
4729                ret.extend(BLOCK.blockin(listname))
4730                BLOCK.propset("indent", listitemindent)
4731                BLOCK.propset("type", listtype)
4732
4733            # Closing sublists
4734            while len(listitemindent) < len(BLOCK.prop("indent")):
4735                ret.extend(BLOCK.blockout())
4736
4737            # O-oh, sublist before list ("\n\n  - foo\n- foo")
4738            # Fix: close sublist (as mother), open another list
4739            if not BLOCK.block().endswith("list"):
4740                ret.extend(BLOCK.blockin(listname))
4741                BLOCK.propset("indent", listitemindent)
4742                BLOCK.propset("type", listtype)
4743
4744        # ---------------------[ Table ]-----------------------------
4745
4746        # TODO escape undesired format inside table
4747        if regex["table"].search(line):
4748
4749            if not BLOCK.isblock("table"):  # first table line!
4750                ret.extend(BLOCK.blockin("table"))
4751                BLOCK.tableparser.__init__(line)
4752
4753            tablerow = TableMaster().parse_row(line)
4754            BLOCK.tableparser.add_row(tablerow)  # save config
4755
4756            # Maintain line to unmask and inlines
4757            # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :(
4758            # TODO isolate unmask+inlines parsing to use here
4759            line = SEPARATOR.join(tablerow["cells"])
4760
4761        # ---------------------[ Paragraph ]-------------------------
4762
4763        if not BLOCK.block():  # new para!
4764            ret.extend(BLOCK.blockin("para"))
4765
4766        ############################################################
4767        ############################################################
4768        ############################################################
4769
4770        # ---------------------[ Final Parses ]----------------------
4771
4772        # The target-specific special char escapes for body lines
4773        line = doEscape(target, line)
4774
4775        line = add_inline_tags(line)
4776        line = MASK.undo(line)
4777
4778        # ---------------------[ Hold or Return? ]-------------------
4779
4780        # Now we must choose where to put the parsed line
4781        #
4782        if not results_box:
4783            # List item extra lines
4784            if BLOCK.block().endswith("list"):
4785                results_box = BLOCK.holdaddsub
4786            # Other blocks
4787            elif BLOCK.block():
4788                results_box = BLOCK.holdadd
4789            # No blocks
4790            else:
4791                line = doFinalEscape(target, line)
4792                results_box = ret.append
4793
4794        results_box(line)
4795
4796    # EOF: close any open para/verb/lists/table/quotes
4797    Debug("EOF", 7)
4798    while BLOCK.block():
4799        ret.extend(BLOCK.blockout())
4800
4801    # Maybe close some opened title area?
4802    if rules["titleblocks"]:
4803        ret.extend(TITLE.close_all())
4804
4805    # Maybe a major tag to enclose body? (like DIV for CSS)
4806    if TAGS["bodyOpen"]:
4807        ret.insert(0, TAGS["bodyOpen"])
4808    if TAGS["bodyClose"]:
4809        ret.append(TAGS["bodyClose"])
4810
4811    marked_toc = TITLE.dump_marked_toc()
4812
4813    return ret, marked_toc
4814
4815
4816def exec_command_line(user_cmdline=None):
4817    global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, Error
4818
4819    # Extract command line data
4820    cmdline_data = user_cmdline or sys.argv[1:]
4821    CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=True)
4822    cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
4823    DEBUG = cmdline_parsed.get("debug") or 0
4824    VERBOSE = cmdline_parsed.get("verbose") or 0
4825    QUIET = cmdline_parsed.get("quiet") or 0
4826    infiles = cmdline_parsed.get("infile") or []
4827
4828    Message("Processing begins", 1)
4829
4830    # The easy ones
4831    if cmdline_parsed.get("help"):
4832        Quit(USAGE)
4833    if cmdline_parsed.get("version"):
4834        Quit(VERSIONSTR)
4835    if cmdline_parsed.get("targets"):
4836        listTargets()
4837        Quit()
4838
4839    Debug("system platform: %s" % sys.platform)
4840    Debug("python version: %s" % (sys.version.split("(")[0]))
4841    Debug("command line: %s" % sys.argv)
4842    Debug("command line raw config: %s" % CMDLINE_RAW, 1)
4843
4844    # Extract RC file config
4845    if cmdline_parsed.get("rc") == 0:
4846        Message("Ignoring user configuration file", 1)
4847    else:
4848        rc_file = get_rc_path()
4849        if os.path.isfile(rc_file):
4850            Message("Loading user configuration file", 1)
4851            RC_RAW = ConfigLines(file_=rc_file).get_raw_config()
4852
4853        Debug("rc file: %s" % rc_file)
4854        Debug("rc file raw config: %s" % RC_RAW, 1)
4855
4856    # TODO#1: this checking should be only in ConfigMaster.sanity()
4857    if len(infiles) == 1:
4858        infile = infiles[0]
4859    else:
4860        Error(
4861            "Pass exactly one input file (see --help). "
4862            "Example: {} -t html file.t2t".format(my_name)
4863        )
4864
4865    config, doc = process_source_file(infile)
4866    headers, config_source, body = doc
4867
4868    first_body_lineno = (len(headers) or 1) + len(config_source) + 1
4869    convert_file(headers, body, config, first_body_lineno=first_body_lineno)
4870
4871    Message("Txt2tags finished successfully", 1)
4872
4873
4874if __name__ == "__main__":
4875    try:
4876        exec_command_line()
4877    except error as msg:
4878        sys.exit(msg)
4879    except Exception:
4880        sys.exit(getUnknownErrorMessage())
4881    else:
4882        Quit()
4883