1#!/usr/bin/env python
2# txt2tags - generic text conversion tool
3# http://txt2tags.sf.net
4#
5# Copyright 2001, 2002, 2003, 2004, 2005 Aurelio Marinho Jargas
6#
7#   This program is free software; you can redistribute it and/or modify
8#   it under the terms of the GNU General Public License as published by
9#   the Free Software Foundation, version 2.
10#
11#   This program is distributed in the hope that it will be useful,
12#   but WITHOUT ANY WARRANTY; without even the implied warranty of
13#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#   GNU General Public License for more details.
15#
16#   You have received a copy of the GNU General Public License along
17#   with this program, on the COPYING file.
18#
19#
20#
21#   +-------------------------------------------------------------+
22#   |               IMPORTANT MESSAGES, PLEASE READ               |
23#   +-------------------------------------------------------------+
24#   |                                                             |
25#   |                                                             |
26#   |                     v1.x COMPATIBILITY                      |
27#   |                     ------------------                      |
28#   |                                                             |
29#   |      Due the major syntax changes, the new 2.x series       |
30#   |      BREAKS backwards compatibility.                        |
31#   |                                                             |
32#   |      Use the 't2tconv' script to upgrade your existing      |
33#   |      v1.x files to conform the new v2.x syntax.             |
34#   |                                                             |
35#   |      Do a visual inspection on the new converted file.      |
36#   |      Specially Pre & Post proc filters can break.           |
37#   |      Check them!                                            |
38#   |                                                             |
39#   |                                                             |
40#   +-------------------------------------------------------------+
41#
42#
43########################################################################
44#
45#   BORING CODE EXPLANATION AHEAD
46#
47# Just read if you wish to understand how the txt2tags code works
48#
49########################################################################
50#
51# Version 2.0 was a complete rewrite for the program 'core'.
52#
53# Now the code that [1] parses the marked text is separated from the
54# code that [2] insert the target tags.
55#
56#   [1] made by: def convert()
57#   [2] made by: class BlockMaster
58#
59# The structures of the marked text are identifyed and its contents are
60# extracted into a data holder (Python lists and dictionaries).
61#
62# When parsing the source file, the blocks (para, lists, quote, table)
63# are opened with BlockMaster, right when found. Then its contents,
64# which spans on several lines, are feeded into a special holder on the
65# BlockMaster instance. Just when the block is closed, the target tags
66# are inserted for the full block as a whole, in one pass. This way, we
67# have a better control on blocks. Much better than the previous line by
68# line approach.
69#
70# In other words, whenever inside a block, the parser *holds* the tag
71# insertion process, waiting until the full block is readed. That was
72# needed primary to close paragraphs for the new XHTML target, but
73# proved to be a very good adding, improving many other processings.
74#
75# -------------------------------------------------------------------
76#
77# There is also a brand new code for the Configuration schema, 100%
78# rewritten. There are new classes, all self documented: CommandLine,
79# SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
80# Config format was created, and all kind of configuration is first
81# converted to this format, and then a generic method parses it.
82#
83# The init processing was changed also, and now the functions which
84# gets informations about the input files are: get_infiles_config(),
85#  process_source_file() and convert_this_files()
86#
87# Other parts are untouched, and remains the same as in v1.7, as the
88# marks regexes, target Headers and target Tags&Rules.
89#
90########################################################################
91
92# Now I think the code is nice, easier to read and understand
93
94#XXX Python coding warning
95# Avoid common mistakes:
96# - do NOT use newlist=list instead newlist=list[:]
97# - do NOT use newdic=dic   instead newdic=dic.copy()
98# - do NOT use dic[key]     instead dic.get(key)
99# - do NOT use del dic[key] without has_key() before
100
101#XXX Smart Image Align don't work if the image is a link
102# Can't fix that because the image is expanded together with the
103# link, at the linkbank filling moment. Only the image is passed
104# to parse_images(), not the full line, so it is always 'middle'.
105
106#XXX Paragraph separation not valid inside Quote
107# Quote will not have <p></p> inside, instead will close and open
108# again the <blockquote>. This really sux in CSS, when defining a
109# diferent background color. Still don't know how to fix it.
110
111#XXX TODO (maybe)
112# New mark or macro which expands to an anchor full title.
113# It is necessary to parse the full document in this order:
114#  DONE  1st scan: HEAD: get all settings, including %!includeconf
115#  DONE  2nd scan: BODY: expand includes & apply %!preproc
116#        3rd scan: BODY: read titles and compose TOC info
117#        4th scan: BODY: full parsing, expanding [#anchor] 1st
118# Steps 2 and 3 can be made together, with no tag adding.
119# Two complete body scans will be *slow*, don't know if it worths.
120# One solution may be add the titles as postproc rules
121
122
123##############################################################################
124
125# User config (1=ON, 0=OFF)
126
127USE_I18N    = 1   # use gettext for i18ned messages?        (default is 1)
128COLOR_DEBUG = 1   # show debug messages in colors?          (default is 1)
129BG_LIGHT    = 0   # your terminal background color is light (default is 0)
130HTML_LOWER  = 0   # use lowercased HTML tags instead upper? (default is 0)
131
132##############################################################################
133
134
135# these are all the core Python modules used by txt2tags (KISS!)
136import re, string, os, sys, time, getopt
137
138# program information
139my_url = 'http://txt2tags.sf.net'
140my_name = 'txt2tags'
141my_email = 'verde@aurelio.net'
142my_version = '2.3'
143
144# i18n - just use if available
145if USE_I18N:
146	try:
147		import gettext
148		# if your locale dir is different, change it here
149		cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
150		_ = cat.gettext
151	except:
152		_ = lambda x:x
153else:
154	_ = lambda x:x
155
156# FLAGS   : the conversion related flags  , may be used in %!options
157# OPTIONS : the conversion related options, may be used in %!options
158# ACTIONS : the other behaviour modifiers, valid on command line only
159# MACROS  : the valid macros with their default values for formatting
160# SETTINGS: global miscelaneous settings, valid on RC file only
161# NO_TARGET: actions that don't require a target specification
162# NO_MULTI_INPUT: actions that don't accept more than one input file
163# CONFIG_KEYWORDS: the valid %!key:val keywords
164#
165# FLAGS and OPTIONS are configs that affect the converted document.
166# They usually have also a --no-<option> to turn them OFF.
167# ACTIONS are needed because when doing multiple input files, strange
168# behaviour would be found, as use command line interface for the
169# first file and gui for the second. There is no --no-<action>.
170# --version and --help inside %!options are also odd
171#
172TARGETS  = ['html', 'xhtml', 'sgml', 'tex', 'lout', 'man', 'mgp',
173            'moin', 'pm6'  , 'txt']
174FLAGS    = {'headers'    :1 , 'enum-title' :0 , 'mask-email' :0 ,
175            'toc-only'   :0 , 'toc'        :0 , 'rc'         :1 ,
176            'css-sugar'  :0 , 'css-suggar' :0 , 'css-inside' :0 ,
177            'quiet'      :0 }
178OPTIONS  = {'target'     :'', 'toc-level'  :3 , 'style'      :'',
179            'infile'     :'', 'outfile'    :'', 'encoding'   :'',
180            'config-file':'', 'split'      :0 , 'lang'       :''}
181ACTIONS  = {'help'       :0 , 'version'    :0 , 'gui'        :0 ,
182            'verbose'    :0 , 'debug'      :0 , 'dump-config':0 ,
183            'dump-source':0 }
184MACROS   = {'date' : '%Y%m%d',  'infile': '%f',
185            'mtime': '%Y%m%d', 'outfile': '%f'}
186SETTINGS = {}         # for future use
187NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source']
188NO_MULTI_INPUT = ['gui','dump-config','dump-source']
189CONFIG_KEYWORDS = [
190            'target', 'encoding', 'style', 'options', 'preproc','postproc',
191            'guicolors']
192TARGET_NAMES = {
193  'html' : _('HTML page'),
194  'xhtml': _('XHTML page'),
195  'sgml' : _('SGML document'),
196  'tex'  : _('LaTeX document'),
197  'lout' : _('Lout document'),
198  'man'  : _('UNIX Manual page'),
199  'mgp'  : _('Magic Point presentation'),
200  'moin' : _('MoinMoin page'),
201  'pm6'  : _('PageMaker 6.0 document'),
202  'txt'  : _('Plain Text'),
203}
204
205DEBUG = 0     # do not edit here, please use --debug
206VERBOSE = 0   # do not edit here, please use -v, -vv or -vvv
207QUIET = 0     # do not edit here, please use --quiet
208GUI = 0       # do not edit here, please use --gui
209AUTOTOC = 1   # do not edit here, please use --no-toc or %%toc
210
211RC_RAW = []
212CMDLINE_RAW = []
213CONF = {}
214BLOCK = None
215regex = {}
216TAGS = {}
217rules = {}
218
219lang = 'english'
220TARGET = ''
221
222STDIN = STDOUT = '-'
223MODULEIN = MODULEOUT = '-module-'
224ESCCHAR   = '\x00'
225SEPARATOR = '\x01'
226LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
227LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
228
229# plataform specific settings
230LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
231
232# identify a development version
233#dev_suffix = '-dev'+time.strftime('%m%d',time.localtime(time.time()))
234#my_version = my_version + dev_suffix
235
236VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
237
238USAGE = string.join([
239'',
240_("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
241'',
242_("  -t, --target=TYPE   set target document type. currently supported:"),
243'                      %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
244_("  -i, --infile=FILE   set FILE as the input file name ('-' for STDIN)"),
245_("  -o, --outfile=FILE  set FILE as the output file name ('-' for STDOUT)"),
246_("  -n, --enum-title    enumerate all title lines as 1, 1.1, 1.1.1, etc"),
247_("  -H, --no-headers    suppress header, title and footer contents"),
248_("      --headers       show header, title and footer contents (default ON)"),
249_("      --encoding=ENC  set target file encoding (utf-8, iso-8859-1, etc)"),
250_("      --style=FILE    use FILE as the document style (like HTML CSS)"),
251_("      --css-sugar     insert CSS-friendly tags for HTML and XHTML targets"),
252_("      --css-inside    insert CSS file contents inside HTML/XHTML headers"),
253_("      --mask-email    hide email from spam robots. x@y.z turns <x (a) y z>"),
254_("      --toc           add TOC (Table of Contents) to target document"),
255_("      --toc-only      print document TOC and exit"),
256_("      --toc-level=N   set maximum TOC level (depth) to N"),
257_("  -C, --config-file=F read config from file F"),
258_("      --rc            read user config file ~/.txt2tagsrc (default ON)"),
259_("      --gui           invoke Graphical Tk Interface"),
260_("  -q, --quiet         quiet mode, suppress all output (except errors)"),
261_("  -v, --verbose       print informative messages during conversion"),
262_("  -h, --help          print this help information and exit"),
263_("  -V, --version       print program version and exit"),
264_("      --dump-config   print all the config found and exit"),
265_("      --dump-source   print the document source, with includes expanded"),
266'',
267_("Turn OFF options:"),
268"     --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
269"     --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
270"     --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config",
271"     --no-dump-source",
272'',
273_("Example:\n     %s -t html --toc myfile.t2t") % my_name,
274'',
275_("By default, converted output is saved to 'infile.<target>'."),
276_("Use --outfile to force an output file name."),
277_("If  input file is '-', reads from STDIN."),
278_("If output file is '-', dumps output to STDOUT."),
279''
280], '\n')
281
282
283##############################################################################
284
285
286# here is all the target's templates
287# you may edit them to fit your needs
288#  - the %(HEADERn)s strings represent the Header lines
289#  - the %(STYLE)s string is changed by --style contents
290#  - the %(ENCODING)s string is changed by --encoding contents
291#  - if any of the above is empty, the full line is removed
292#  - use %% to represent a literal %
293#
294HEADER_TEMPLATE = {
295  'txt': """\
296%(HEADER1)s
297%(HEADER2)s
298%(HEADER3)s
299""",
300
301  'sgml': """\
302<!doctype linuxdoc system>
303<article>
304<title>%(HEADER1)s
305<author>%(HEADER2)s
306<date>%(HEADER3)s
307""",
308
309  'html': """\
310<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
311<HTML>
312<HEAD>
313<META NAME="generator" CONTENT="http://txt2tags.sf.net">
314<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
315<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
316<TITLE>%(HEADER1)s</TITLE>
317</HEAD><BODY BGCOLOR="white" TEXT="black">
318<P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
319<FONT SIZE="4">
320<I>%(HEADER2)s</I><BR>
321%(HEADER3)s
322</FONT></CENTER>
323""",
324
325  'htmlcss': """\
326<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
327<HTML>
328<HEAD>
329<META NAME="generator" CONTENT="http://txt2tags.sf.net">
330<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
331<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
332<TITLE>%(HEADER1)s</TITLE>
333</HEAD>
334<BODY>
335
336<DIV CLASS="header" ID="header">
337<H1>%(HEADER1)s</H1>
338<H2>%(HEADER2)s</H2>
339<H3>%(HEADER3)s</H3>
340</DIV>
341""",
342
343  'xhtml': """\
344<?xml version="1.0"
345      encoding="%(ENCODING)s"
346?>
347<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
348 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
349<html xmlns="http://www.w3.org/1999/xhtml">
350<head>
351<title>%(HEADER1)s</title>
352<meta name="generator" content="http://txt2tags.sf.net" />
353<link rel="stylesheet" type="text/css" href="%(STYLE)s" />
354</head>
355<body bgcolor="white" text="black">
356<div align="center">
357<h1>%(HEADER1)s</h1>
358<h2>%(HEADER2)s</h2>
359<h3>%(HEADER3)s</h3>
360</div>
361""",
362
363  'xhtmlcss': """\
364<?xml version="1.0"?>
365<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
366 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
367<html xmlns="http://www.w3.org/1999/xhtml">
368<head>
369<title>%(HEADER1)s</title>
370<meta name="generator" content="http://txt2tags.sf.net" />
371<meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
372<link rel="stylesheet" type="text/css" href="%(STYLE)s" />
373</head>
374<body>
375
376<div class="header" id="header">
377<h1>%(HEADER1)s</h1>
378<h2>%(HEADER2)s</h2>
379<h3>%(HEADER3)s</h3>
380</div>
381""",
382
383  'man': """\
384.TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
385""",
386
387# TODO style to <HR>
388  'pm6': """\
389<PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
390><@Normal=
391  <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
392  <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
393  <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
394  <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
395  <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
396  <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
397  <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
398><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
399  <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
400><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
401  <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
402><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
403  <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
404><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
405><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
406><@Title4=<@-PARENT "Title3">
407><@Title5=<@-PARENT "Title3">
408><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
409
410%(HEADER1)s
411%(HEADER2)s
412%(HEADER3)s
413""",
414
415  'mgp': """\
416#!/usr/X11R6/bin/mgp -t 90
417%%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
418%%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
419%%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
420%%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
421%%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
422%%default 1 size 5
423%%default 2 size 8, fore "yellow", font "normal-b", center
424%%default 3 size 5, fore "white",  font "normal", left, prefix "  "
425%%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
426%%tab 2 prefix "            ", icon arc "orange" 40, leftfill
427%%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
428%%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
429%%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
430%%%%------------------------- end of headers -----------------------------
431%%page
432
433
434
435
436
437%%size 10, center, fore "yellow"
438%(HEADER1)s
439
440%%font "normal-i", size 6, fore "white", center
441%(HEADER2)s
442
443%%font "mono", size 7, center
444%(HEADER3)s
445""",
446
447  'moin': """\
448'''%(HEADER1)s'''
449
450''%(HEADER2)s''
451
452%(HEADER3)s
453""",
454
455  'tex': \
456r"""\documentclass[11pt,a4paper]{scrbook}
457\usepackage{amsfonts,graphicx}
458\usepackage[pdfstartview=FitH,urlcolor=blue,colorlinks=true,bookmarks=true]{hyperref}
459\usepackage[%(ENCODING)s]{inputenc}  %% char encoding
460\usepackage{%(STYLE)s}  %% user defined package
461\tolerance=10000
462\usepackage{scrpage2}
463\pagestyle{scrheadings}
464\refoot{%(HEADER1)s}
465
466\title{%(HEADER1)s}
467\author{%(HEADER2)s}
468\begin{document}
469\date{%(HEADER3)s}
470\maketitle
471\clearpage
472""",
473
474  'lout': """\
475@SysInclude { doc }
476@Document
477  @InitialFont { Times Base 12p }	# Times, Courier, Helvetica, ...
478  @PageOrientation { Portrait }		# Portrait, Landscape
479  @ColumnNumber { 1 }			# Number of columns (2, 3, ...)
480  @PageHeaders { Simple }		# None, Simple, Titles, NoTitles
481  @InitialLanguage { English }		# German, French, Portuguese, ...
482  @OptimizePages { Yes }                # Yes/No smart page break feature
483//
484@Text @Begin
485@Display @Heading { %(HEADER1)s }
486@Display @I { %(HEADER2)s }
487@Display { %(HEADER3)s }
488#@NP                                    # Break page after Headers
489"""
490# @SysInclude { tbl }			# Tables support
491# setup: @MakeContents { Yes }          # show TOC
492# setup: @SectionGap                    # break page at each section
493}
494
495
496##############################################################################
497
498
499def getTags(config):
500	"Returns all the known tags for the specified target"
501
502	keys = [
503	'paragraphOpen','paragraphClose',
504	'title1','title2','title3','title4','title5',
505	'title1Open','title1Close','title2Open','title2Close',
506	'blocktitle1Open','title1Close','title2Open','title2Close',
507	'title3Open','title3Close','title4Open','title4Close',
508	'title5Open','title5Close',
509	'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
510	'blockVerbOpen','blockVerbClose',
511	'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
512	'fontMonoOpen','fontMonoClose',
513	'fontBoldOpen','fontBoldClose',
514	'fontItalicOpen','fontItalicClose',
515	'fontUnderlineOpen','fontUnderlineClose',
516	'listOpen','listClose',
517	'listItemOpen','listItemClose','listItemLine',
518	'numlistOpen','numlistClose',
519	'numlistItemOpen','numlistItemClose','numlistItemLine',
520	'deflistOpen','deflistClose',
521	'deflistItem1Open','deflistItem1Close',
522	'deflistItem2Open','deflistItem2Close',
523	'bar1','bar2',
524	'url','urlMark','email','emailMark',
525	'img','imgAlignLeft','imgAlignRight','imgAlignCenter',
526	'tableOpen','tableClose',
527	'tableRowOpen','tableRowClose','tableRowSep',
528	'tableCellOpen','tableCellClose','tableCellSep',
529	'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
530	'tableTitleRowOpen','tableTitleRowClose',
531	'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
532	'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
533	'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
534	'tableColAlignSep', 'tableCellColSpan',
535	'anchor','comment','pageBreak',
536	'TOC','tocOpen','tocClose',
537	'cssOpen', 'cssClose',
538	'bodyOpen','bodyClose',
539	'EOD'
540	]
541
542	# TIP: \a represents the current text on the mark
543	# TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
544
545	alltags = {
546
547	'txt': {
548	   'title1'              : '  \a'      ,
549	   'title2'              : '\t\a'      ,
550	   'title3'              : '\t\t\a'    ,
551	   'title4'              : '\t\t\t\a'  ,
552	   'title5'              : '\t\t\t\t\a',
553	   'blockQuoteLine'      : '\t'        ,
554	   'listItemOpen'        : '- '        ,
555	   'numlistItemOpen'     : '\a. '      ,
556	   'bar1'                : '\a'        ,
557	   'url'                 : '\a'        ,
558	   'urlMark'             : '\a (\a)'   ,
559	   'email'               : '\a'        ,
560	   'emailMark'           : '\a (\a)'   ,
561	   'img'                 : '[\a]'      ,
562	},
563
564	'html': {
565	   'paragraphOpen'       : '<P>'            ,
566	   'paragraphClose'      : '</P>'           ,
567	   'title1'              : '<H1>\a~A~</H1>' ,
568	   'title2'              : '<H2>\a~A~</H2>' ,
569	   'title3'              : '<H3>\a~A~</H3>' ,
570	   'title4'              : '<H4>\a~A~</H4>' ,
571	   'title5'              : '<H5>\a~A~</H5>' ,
572	   'blockVerbOpen'       : '<PRE>'          ,
573	   'blockVerbClose'      : '</PRE>'         ,
574	   'blockQuoteOpen'      : '<BLOCKQUOTE>'   ,
575	   'blockQuoteClose'     : '</BLOCKQUOTE>'  ,
576	   'fontMonoOpen'        : '<CODE>'         ,
577	   'fontMonoClose'       : '</CODE>'        ,
578	   'fontBoldOpen'        : '<B>'            ,
579	   'fontBoldClose'       : '</B>'           ,
580	   'fontItalicOpen'      : '<EM>'           ,
581	   'fontItalicClose'     : '</EM>'          ,
582	   'fontUnderlineOpen'   : '<U>'            ,
583	   'fontUnderlineClose'  : '</U>'           ,
584	   'listOpen'            : '<UL>'           ,
585	   'listClose'           : '</UL>'          ,
586	   'listItemOpen'        : '<LI>'           ,
587	   'numlistOpen'         : '<OL>'           ,
588	   'numlistClose'        : '</OL>'          ,
589	   'numlistItemOpen'     : '<LI>'           ,
590	   'deflistOpen'         : '<DL>'           ,
591	   'deflistClose'        : '</DL>'          ,
592	   'deflistItem1Open'    : '<DT>'           ,
593	   'deflistItem1Close'   : '</DT>'          ,
594	   'deflistItem2Open'    : '<DD>'           ,
595	   'bar1'                : '<HR NOSHADE SIZE=1>'        ,
596	   'bar2'                : '<HR NOSHADE SIZE=5>'        ,
597	   'url'                 : '<A HREF="#\a">\a</A>'        ,
598	   'urlMark'             : '<A HREF="#\a">\a</A>'        ,
599	   'email'               : '<A HREF="mailto:\a">\a</A>' ,
600	   'emailMark'           : '<A HREF="mailto:\a">\a</A>' ,
601	   'img'                 : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
602	   'imgAlignLeft'        : ' ALIGN="left"'  ,
603	   'imgAlignCenter'      : ' ALIGN="middle"',
604	   'imgAlignRight'       : ' ALIGN="right"' ,
605	   'tableOpen'           : '<TABLE~A~ CELLPADDING="4"~B~>',
606	   'tableClose'          : '</TABLE>'       ,
607	   'tableRowOpen'        : '<TR>'           ,
608	   'tableRowClose'       : '</TR>'          ,
609	   'tableCellOpen'       : '<TD~A~~S~>'      ,
610	   'tableCellClose'      : '</TD>'          ,
611	   'tableTitleCellOpen'  : '<TH~S~>'        ,
612	   'tableTitleCellClose' : '</TH>'          ,
613	   'tableBorder'         : ' BORDER="1"'    ,
614	   'tableAlignCenter'    : ' ALIGN="center"',
615	   'tableCellAlignRight' : ' ALIGN="right"' ,
616	   'tableCellAlignCenter': ' ALIGN="center"',
617	   'tableCellColSpan'    : ' COLSPAN="\a"'  ,
618	   'anchor'              : '<A NAME="\a"></A>\n',
619	   'cssOpen'             : '<STYLE TYPE="text/css">',
620	   'cssClose'            : '</STYLE>'       ,
621	   'comment'             : '<!-- \a -->'    ,
622	   'EOD'                 : '</BODY></HTML>'
623	},
624
625	#TIP xhtml inherits all HTML definitions (lowercased)
626	#TIP http://www.w3.org/TR/xhtml1/#guidelines
627	#TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
628	'xhtml': {
629	   'listItemClose'       : '</li>'          ,
630	   'numlistItemClose'    : '</li>'          ,
631	   'deflistItem2Close'   : '</dd>'          ,
632	   'bar1'                : '<hr class="light" />',
633	   'bar2'                : '<hr class="heavy" />',
634	   'anchor'              : '<a id="\a" name="\a"></a>\n',
635	   'img'                 : '<img~A~ src="\a" border="0" alt=""/>',
636	},
637
638	'sgml': {
639	   'paragraphOpen'       : '<p>'                ,
640	   'title1'              : '<sect>\a~A~<p>'     ,
641	   'title2'              : '<sect1>\a~A~<p>'    ,
642	   'title3'              : '<sect2>\a~A~<p>'    ,
643	   'title4'              : '<sect3>\a~A~<p>'    ,
644	   'title5'              : '<sect4>\a~A~<p>'    ,
645	   'blockVerbOpen'       : '<tscreen><verb>'    ,
646	   'blockVerbClose'      : '</verb></tscreen>'  ,
647	   'blockQuoteOpen'      : '<quote>'            ,
648	   'blockQuoteClose'     : '</quote>'           ,
649	   'fontMonoOpen'        : '<tt>'               ,
650	   'fontMonoClose'       : '</tt>'              ,
651	   'fontBoldOpen'        : '<bf>'               ,
652	   'fontBoldClose'       : '</bf>'              ,
653	   'fontItalicOpen'      : '<em>'               ,
654	   'fontItalicClose'     : '</em>'              ,
655	   'fontUnderlineOpen'   : '<bf><em>'           ,
656	   'fontUnderlineClose'  : '</em></bf>'         ,
657	   'listOpen'            : '<itemize>'          ,
658	   'listClose'           : '</itemize>'         ,
659	   'listItemOpen'        : '<item>'             ,
660	   'numlistOpen'         : '<enum>'             ,
661	   'numlistClose'        : '</enum>'            ,
662	   'numlistItemOpen'     : '<item>'             ,
663	   'deflistOpen'         : '<descrip>'          ,
664	   'deflistClose'        : '</descrip>'         ,
665	   'deflistItem1Open'    : '<tag>'              ,
666	   'deflistItem1Close'   : '</tag>'             ,
667	   'bar1'                : '<!-- \a -->'        ,
668	   'url'                 : '<htmlurl url="\a" name="\a">'        ,
669	   'urlMark'             : '<htmlurl url="\a" name="\a">'        ,
670	   'email'               : '<htmlurl url="mailto:\a" name="\a">' ,
671	   'emailMark'           : '<htmlurl url="mailto:\a" name="\a">' ,
672	   'img'                 : '<figure><ph vspace=""><img src="\a">'+\
673	                           '</figure>'                           ,
674	   'tableOpen'           : '<table><tabular ca="~C~">'           ,
675	   'tableClose'          : '</tabular></table>' ,
676	   'tableRowSep'         : '<rowsep>'           ,
677	   'tableCellSep'        : '<colsep>'           ,
678	   'tableColAlignLeft'   : 'l'                  ,
679	   'tableColAlignRight'  : 'r'                  ,
680	   'tableColAlignCenter' : 'c'                  ,
681	   'comment'             : '<!-- \a -->'        ,
682	   'anchor'              : '<label id="\a">'    ,
683	   'TOC'                 : '<toc>'              ,
684	   'EOD'                 : '</article>'
685	},
686
687	'tex': {
688	   'title1'              : '\n\chapter*{\a}~A~'     ,
689	   'title2'              : '\n\section*{\a}~A~'     ,
690	   'title3'              : '\\subsection*{\a}~A~'   ,
691	   'title4'              : '\\subsubsection*{\a}~A~',
692	   # title 4/5: DIRTY: para+BF+\\+\n
693	   'title5'              : '\\paragraph{}\\textbf{\a}~A~\\\\\n',
694	   'title6'              : '\\paragraph{}\\textbf{\a}~A~\\\\\n',
695	   'numtitle1'           : '\n\chapter{\a}~A~'      ,
696	   'numtitle2'           : '\n\section{\a}~A~'      ,
697	   'numtitle3'           : '\\subsection{\a}~A~'    ,
698	   'numtitle4'           : '\\subsubsection{\a}~A~' ,
699	   'blockVerbOpen'       : '\\begin{verbatim}'   ,
700	   'blockVerbClose'      : '\\end{verbatim}'     ,
701	   'blockQuoteOpen'      : '\\begin{quotation}'  ,
702	   'blockQuoteClose'     : '\\end{quotation}'    ,
703	   'fontMonoOpen'        : '\\texttt{'           ,
704	   'fontMonoClose'       : '}'                   ,
705	   'fontBoldOpen'        : '\\textbf{'           ,
706	   'fontBoldClose'       : '}'                   ,
707	   'fontItalicOpen'      : '\\textit{'           ,
708	   'fontItalicClose'     : '}'                   ,
709	   'fontUnderlineOpen'   : '\\underline{'        ,
710	   'fontUnderlineClose'  : '}'                   ,
711	   'listOpen'            : '\\begin{itemize}'    ,
712	   'listClose'           : '\\end{itemize}'      ,
713	   'listItemOpen'        : '\\item '             ,
714	   'numlistOpen'         : '\\begin{enumerate}'  ,
715	   'numlistClose'        : '\\end{enumerate}'    ,
716	   'numlistItemOpen'     : '\\item '             ,
717	   'deflistOpen'         : '\\begin{description}',
718	   'deflistClose'        : '\\end{description}'  ,
719	   'deflistItem1Open'    : '\\item['             ,
720	   'deflistItem1Close'   : ']'                   ,
721	   'bar1'                : '\n\\hrulefill{}\n'   ,
722	   'bar2'                : '\n\\rule{\linewidth}{1mm}\n',
723	   'url'                 : '\\htmladdnormallink{\a}{\a}',
724	   'urlMark'             : '\a (\\ref{\a})',
725	   'email'               : '\\htmladdnormallink{\a}{mailto:\a}',
726	   'emailMark'           : '\\htmladdnormallink{\a}{mailto:\a}',
727	   'img'                 : '\\includegraphics{\a}',
728	   'tableOpen'           : '\\begin{center}\\begin{tabular}{|~C~|}',
729	   'tableClose'          : '\\end{tabular}\\end{center}',
730	   'tableRowOpen'        : '\\hline ' ,
731	   'tableRowClose'       : ' \\\\'    ,
732	   'tableCellSep'        : ' & '      ,
733	   'tableColAlignLeft'   : 'l'        ,
734	   'tableColAlignRight'  : 'r'        ,
735	   'tableColAlignCenter' : 'c'        ,
736	   'tableColAlignSep'    : '|'        ,
737	   'comment'             : '% \a'     ,
738	   'anchor'              : '\\label{\a}',
739	   'TOC'                 : '\\tableofcontents',
740	   'pageBreak'           : '\\clearpage',
741	   'EOD'                 : '\\end{document}'
742	},
743
744	'lout': {
745	   'paragraphOpen'       : '@LP'                     ,
746	   'blockTitle1Open'     : '@BeginSections'          ,
747	   'blockTitle1Close'    : '@EndSections'            ,
748	   'blockTitle2Open'     : ' @BeginSubSections'      ,
749	   'blockTitle2Close'    : ' @EndSubSections'        ,
750	   'blockTitle3Open'     : '  @BeginSubSubSections'  ,
751	   'blockTitle3Close'    : '  @EndSubSubSections'    ,
752	   'title1Open'          : '\n@Section @Title { \a } @Begin',
753	   'title1Close'         : '@End @Section'           ,
754	   'title2Open'          : '\n @SubSection @Title { \a } @Begin',
755	   'title2Close'         : ' @End @SubSection'       ,
756	   'title3Open'          : '\n  @SubSubSection @Title { \a } @Begin',
757	   'title3Close'         : '  @End @SubSubSection'   ,
758	   'title4Open'          : '\n@LP @LeftDisplay @B { \a }',
759	   'title5Open'          : '\n@LP @LeftDisplay @B { \a }',
760	   'anchor'              : '@Tag { \a }'             ,
761	   'blockVerbOpen'       : '@LP @ID @F @RawVerbatim @Begin',
762	   'blockVerbClose'      : '@End @RawVerbatim'   ,
763	   'blockQuoteOpen'      : '@QD {'               ,
764	   'blockQuoteClose'     : '}'                   ,
765	   # enclosed inside {} to deal with joined**words**
766	   'fontMonoOpen'        : '{@F {'               ,
767	   'fontMonoClose'       : '}}'                  ,
768	   'fontBoldOpen'        : '{@B {'               ,
769	   'fontBoldClose'       : '}}'                  ,
770	   'fontItalicOpen'      : '{@II {'              ,
771	   'fontItalicClose'     : '}}'                  ,
772	   'fontUnderlineOpen'   : '{@Underline{'        ,
773	   'fontUnderlineClose'  : '}}'                  ,
774	   # the full form is more readable, but could be BL EL LI NL TL DTI
775	   'listOpen'            : '@BulletList'         ,
776	   'listClose'           : '@EndList'            ,
777	   'listItemOpen'        : '@ListItem{'          ,
778	   'listItemClose'       : '}'                   ,
779	   'numlistOpen'         : '@NumberedList'       ,
780	   'numlistClose'        : '@EndList'            ,
781	   'numlistItemOpen'     : '@ListItem{'          ,
782	   'numlistItemClose'    : '}'                   ,
783	   'deflistOpen'         : '@TaggedList'         ,
784	   'deflistClose'        : '@EndList'            ,
785	   'deflistItem1Open'    : '@DropTagItem {'      ,
786	   'deflistItem1Close'   : '}'                   ,
787	   'deflistItem2Open'    : '{'                   ,
788	   'deflistItem2Close'   : '}'                   ,
789	   'bar1'                : '\n@DP @FullWidthRule\n'     ,
790	   'url'                 : '{blue @Colour { \a }}'      ,
791	   'urlMark'             : '\a ({blue @Colour { \a }})' ,
792	   'email'               : '{blue @Colour { \a }}'      ,
793	   'emailMark'           : '\a ({blue Colour{ \a }})'   ,
794	   'img'                 : '~A~@IncludeGraphic { \a }'  ,  # eps only!
795	   'imgAlignLeft'        : '@LeftDisplay '              ,
796	   'imgAlignRight'       : '@RightDisplay '             ,
797	   'imgAlignCenter'      : '@CentredDisplay '           ,
798	   # lout tables are *way* complicated, no support for now
799	   #'tableOpen'          : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
800	   #'tableClose'         : '}'     ,
801	   #'tableRowOpen'       : '@Rowa\n'       ,
802	   #'tableTitleRowOpen'  : '@HeaderRowa'       ,
803	   #'tableCenterAlign'   : '@CentredDisplay '         ,
804	   #'tableCellOpen'      : '\a {'                     ,  # A, B, ...
805	   #'tableCellClose'     : '}'                        ,
806	   #'tableBorder'        : '\nrule {yes}'             ,
807	   'comment'             : '# \a'                     ,
808	   # @MakeContents must be on the config file
809	   'TOC'                 : '@DP @ContentsGoesHere @DP',
810	   'pageBreak'           : '\n@NP\n'                  ,
811	   'EOD'                 : '@End @Text'
812	},
813
814	'moin': {
815	   'title1'              : '= \a ='        ,
816	   'title2'              : '== \a =='      ,
817	   'title3'              : '=== \a ==='    ,
818	   'title4'              : '==== \a ===='  ,
819	   'title5'              : '===== \a =====',
820	   'blockVerbOpen'       : '{{{'           ,
821	   'blockVerbClose'      : '}}}'           ,
822	   'blockQuoteLine'      : '  '            ,
823	   'fontMonoOpen'        : '{{{'           ,
824	   'fontMonoClose'       : '}}}'           ,
825	   'fontBoldOpen'        : "'''"           ,
826	   'fontBoldClose'       : "'''"           ,
827	   'fontItalicOpen'      : "''"            ,
828	   'fontItalicClose'     : "''"            ,
829	   'fontUnderlineOpen'   : "__"            ,
830	   'fontUnderlineClose'  : "__"            ,
831	   'listItemOpen'        : ' * '           ,
832	   'numlistItemOpen'     : ' \a. '         ,
833	   'bar1'                : '----'          ,
834	   'url'                 : '[\a]'          ,
835	   'urlMark'             : '[\a \a]'       ,
836	   'email'               : '[\a]'          ,
837	   'emailMark'           : '[\a \a]'       ,
838	   'img'                 : '[\a]'          ,
839	   'tableRowOpen'        : '||'            ,
840	   'tableCellOpen'       : '~A~'           ,
841	   'tableCellClose'      : '||'            ,
842	   'tableTitleCellClose' : '||'            ,
843	   'tableCellAlignRight' : '<)>'           ,
844	   'tableCellAlignCenter': '<:>'           ,
845	   'comment'             : '## \a'         ,
846	   'TOC'                 : '[[TableOfContents]]'
847	},
848
849	'mgp': {
850	   'paragraphOpen'       : '%font "normal", size 5'     ,
851	   'title1'              : '%page\n\n\a\n'              ,
852	   'title2'              : '%page\n\n\a\n'              ,
853	   'title3'              : '%page\n\n\a\n'              ,
854	   'title4'              : '%page\n\n\a\n'              ,
855	   'title5'              : '%page\n\n\a\n'              ,
856	   'blockVerbOpen'       : '%font "mono"'               ,
857	   'blockVerbClose'      : '%font "normal"'             ,
858	   'blockQuoteOpen'      : '%prefix "       "'          ,
859	   'blockQuoteClose'     : '%prefix "  "'               ,
860	   'fontMonoOpen'        : '\n%cont, font "mono"\n'     ,
861	   'fontMonoClose'       : '\n%cont, font "normal"\n'   ,
862	   'fontBoldOpen'        : '\n%cont, font "normal-b"\n' ,
863	   'fontBoldClose'       : '\n%cont, font "normal"\n'   ,
864	   'fontItalicOpen'      : '\n%cont, font "normal-i"\n' ,
865	   'fontItalicClose'     : '\n%cont, font "normal"\n'   ,
866	   'fontUnderlineOpen'   : '\n%cont, fore "cyan"\n'     ,
867	   'fontUnderlineClose'  : '\n%cont, fore "white"\n'    ,
868	   'listItemLine'        : '\t'                         ,
869	   'numlistItemLine'     : '\t'                         ,
870	   'deflistItem1Open'    : '\t\n%cont, font "normal-b"\n',
871	   'deflistItem1Close'   : '\n%cont, font "normal"\n'   ,
872	   'bar1'                : '%bar "white" 5'             ,
873	   'bar2'                : '%pause'                     ,
874	   'url'                 : '\n%cont, fore "cyan"\n\a'   +\
875	                           '\n%cont, fore "white"\n'    ,
876	   'urlMark'             : '\a \n%cont, fore "cyan"\n\a'+\
877	                           '\n%cont, fore "white"\n'    ,
878	   'email'               : '\n%cont, fore "cyan"\n\a'   +\
879	                           '\n%cont, fore "white"\n'    ,
880	   'emailMark'           : '\a \n%cont, fore "cyan"\n\a'+\
881	                           '\n%cont, fore "white"\n'    ,
882	   'img'                 : '~A~\n%newimage "\a"\n%left\n',
883	   'imgAlignLeft'        : '\n%left'                    ,
884	   'imgAlignRight'       : '\n%right'                   ,
885	   'imgAlignCenter'      : '\n%center'                  ,
886	   'comment'             : '%% \a'                      ,
887	   'pageBreak'           : '%page\n\n\n'                ,
888	   'EOD'                 : '%%EOD'
889	},
890
891	# man groff_man ; man 7 groff
892	'man': {
893	   'paragraphOpen'       : '.P'     ,
894	   'title1'              : '.SH \a' ,
895	   'title2'              : '.SS \a' ,
896	   'title3'              : '.SS \a' ,
897	   'title4'              : '.SS \a' ,
898	   'title5'              : '.SS \a' ,
899	   'blockVerbOpen'       : '.nf'    ,
900	   'blockVerbClose'      : '.fi\n'  ,
901	   'blockQuoteOpen'      : '.RS'    ,
902	   'blockQuoteClose'     : '.RE'    ,
903	   'fontBoldOpen'        : '\\fB'   ,
904	   'fontBoldClose'       : '\\fR'   ,
905	   'fontItalicOpen'      : '\\fI'   ,
906	   'fontItalicClose'     : '\\fR'   ,
907	   'listOpen'            : '.RS'    ,
908	   'listItemOpen'        : '.IP \(bu 3\n',
909	   'listClose'           : '.RE'    ,
910	   'numlistOpen'         : '.RS'    ,
911	   'numlistItemOpen'     : '.IP \a. 3\n',
912	   'numlistClose'        : '.RE'    ,
913	   'deflistItem1Open'    : '.TP\n'  ,
914	   'bar1'                : '\n\n'   ,
915	   'url'                 : '\a'     ,
916	   'urlMark'             : '\a (\a)',
917	   'email'               : '\a'     ,
918	   'emailMark'           : '\a (\a)',
919	   'img'                 : '\a'     ,
920	   'tableOpen'           : '.TS\n~A~~B~tab(^); ~C~.',
921	   'tableClose'          : '.TE'     ,
922	   'tableRowOpen'        : ' '       ,
923	   'tableCellSep'        : '^'       ,
924	   'tableAlignCenter'    : 'center, ',
925	   'tableBorder'         : 'allbox, ',
926	   'tableColAlignLeft'   : 'l'       ,
927	   'tableColAlignRight'  : 'r'       ,
928	   'tableColAlignCenter' : 'c'       ,
929	   'comment'             : '.\\" \a'
930	},
931
932	'pm6': {
933	   'paragraphOpen'       : '<@Normal:>'    ,
934	   'title1'              : '\n<@Title1:>\a',
935	   'title2'              : '\n<@Title2:>\a',
936	   'title3'              : '\n<@Title3:>\a',
937	   'title4'              : '\n<@Title4:>\a',
938	   'title5'              : '\n<@Title5:>\a',
939	   'blockVerbOpen'       : '<@PreFormat:>' ,
940	   'blockQuoteLine'      : '<@Quote:>'     ,
941	   'fontMonoOpen'        : '<FONT "Lucida Console"><SIZE 9>' ,
942	   'fontMonoClose'       : '<SIZE$><FONT$>',
943	   'fontBoldOpen'        : '<B>'           ,
944	   'fontBoldClose'       : '<P>'           ,
945	   'fontItalicOpen'      : '<I>'           ,
946	   'fontItalicClose'     : '<P>'           ,
947	   'fontUnderlineOpen'   : '<U>'           ,
948	   'fontUnderlineClose'  : '<P>'           ,
949	   'listOpen'            : '<@Bullet:>'    ,
950	   'listItemOpen'        : '\x95\t'        ,  # \x95 == ~U
951	   'numlistOpen'         : '<@Bullet:>'    ,
952	   'numlistItemOpen'     : '\x95\t'        ,
953	   'bar1'                : '\a'            ,
954	   'url'                 : '<U>\a<P>'      ,  # underline
955	   'urlMark'             : '\a <U>\a<P>'   ,
956	   'email'               : '\a'            ,
957	   'emailMark'           : '\a \a'         ,
958	   'img'                 : '\a'
959	}
960	}
961
962	# exceptions for --css-sugar
963	if config['css-sugar'] and config['target'] in ('html','xhtml'):
964		# change just HTML because XHTML inherits it
965		htmltags = alltags['html']
966		# table with no cellpadding
967		htmltags['tableOpen'] = string.replace(
968			htmltags['tableOpen'], ' CELLPADDING="4"', '')
969		# DIVs
970		htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
971		htmltags['tocClose'] = '</DIV>'
972		htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
973		htmltags['bodyClose']= '</DIV>'
974
975	# make the HTML -> XHTML inheritance
976	xhtml = alltags['html'].copy()
977	for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
978	# some like HTML tags as lowercase, some don't... (headers out)
979	if HTML_LOWER: alltags['html'] = xhtml.copy()
980	xhtml.update(alltags['xhtml'])
981	alltags['xhtml'] = xhtml.copy()
982
983	# compose the target tags dictionary
984	tags = {}
985	target_tags = alltags[config['target']].copy()
986
987	for key in keys: tags[key] = ''     # create empty keys
988	for key in target_tags.keys():
989		tags[key] = maskEscapeChar(target_tags[key]) # populate
990
991	# map strong line to separator if not defined
992	if not tags['bar2'] and tags['bar1']:
993		tags['bar2'] = tags['bar1']
994
995	return tags
996
997
998##############################################################################
999
1000
1001def getRules(config):
1002	"Returns all the target-specific syntax rules"
1003
1004	ret = {}
1005	allrules = [
1006
1007	 # target rules (ON/OFF)
1008	  'linkable',             # target supports external links
1009	  'tableable',            # target supports tables
1010	  'imglinkable',          # target supports images as links
1011	  'imgalignable',         # target supports image alignment
1012	  'imgasdefterm',         # target supports image as definition term
1013	  'autonumberlist',       # target supports numbered lists natively
1014	  'autonumbertitle',      # target supports numbered titles natively
1015	  'parainsidelist',       # lists items supports paragraph
1016	  'spacedlistitem',       # lists support blank lines between items
1017	  'listnotnested',        # lists cannot be nested
1018	  'quotenotnested',       # quotes cannot be nested
1019	  'verbblocknotescaped',  # don't escape specials in verb block
1020	  'verbblockfinalescape', # do final escapes in verb block
1021	  'escapeurl',            # escape special in link URL
1022	  'onelinepara',          # dump paragraph as a single long line
1023	  'tabletitlerowinbold',  # manually bold any cell on table titles
1024	  'tablecellstrip',       # strip extra spaces from each table cell
1025	  'tablecellspannable',   # the table cells can have span attribute
1026	  'barinsidequote',       # bars are allowed inside quote blocks
1027	  'finalescapetitle',     # perform final escapes on title lines
1028	  'autotocnewpagebefore', # break page before automatic TOC
1029	  'autotocnewpageafter',  # break page after automatic TOC
1030	  'autotocwithbars',      # automatic TOC surrounded by bars
1031	  'mapbar2pagebreak',     # map the strong bar to a page break
1032	  'titleblocks',          # titles must be on open/close section blocks
1033
1034	# target code beautify (ON/OFF)
1035	  'indentverbblock',      # add leading spaces to verb block lines
1036	  'breaktablecell',       # break lines after any table cell
1037	  'breaktablelineopen',   # break line after opening table line
1038	  'notbreaklistopen',     # don't break line after opening a new list
1039	  'notbreakparaopen',     # don't break line after opening a new para
1040	  'keepquoteindent',      # don't remove the leading TABs on quotes
1041	  'keeplistindent',       # don't remove the leading spaces on lists
1042	  'blankendmotherlist',   # append a blank line at the mother list end
1043	  'blankendtable',        # append a blank line at the table end
1044	  'blankendautotoc',      # append a blank line at the auto TOC end
1045	  'tagnotindentable',     # tags must be placed at the line begining
1046
1047	# value settings
1048	  'listmaxdepth',         # maximum depth for lists
1049	  'tablecellaligntype'    # type of table cell align: cell, column
1050	]
1051
1052	rules_bank = {
1053	  'txt' : {
1054	    'indentverbblock':1,
1055	    'spacedlistitem':1,
1056	    'parainsidelist':1,
1057	    'keeplistindent':1,
1058	    'barinsidequote':1,
1059	    'autotocwithbars':1,
1060	    'blankendmotherlist':1
1061	    },
1062	  'html': {
1063	    'indentverbblock':1,
1064	    'linkable':1,
1065	    'escapeurl':1,
1066	    'imglinkable':1,
1067	    'imgalignable':1,
1068	    'imgasdefterm':1,
1069	    'autonumberlist':1,
1070	    'spacedlistitem':1,
1071	    'parainsidelist':1,
1072	    'blankendmotherlist':1,
1073	    'tableable':1,
1074	    'tablecellstrip':1,
1075	    'blankendtable':1,
1076	    'breaktablecell':1,
1077	    'breaktablelineopen':1,
1078	    'keeplistindent':1,
1079	    'keepquoteindent':1,
1080	    'barinsidequote':1,
1081	    'autotocwithbars':1,
1082	    'tablecellspannable':1,
1083	    'tablecellaligntype':'cell'
1084	    },
1085	  #TIP xhtml inherits all HTML rules
1086	  'xhtml': {
1087	    },
1088	  'sgml': {
1089	    'linkable':1,
1090	    'escapeurl':1,
1091	    'autonumberlist':1,
1092	    'spacedlistitem':1,
1093	    'blankendmotherlist':1,
1094	    'tableable':1,
1095	    'tablecellstrip':1,
1096	    'blankendtable':1,
1097	    'blankendautotoc':1,
1098	    'quotenotnested':1,
1099	    'keeplistindent':1,
1100	    'keepquoteindent':1,
1101	    'barinsidequote':1,
1102	    'finalescapetitle':1,
1103	    'tablecellaligntype':'column'
1104	    },
1105	  'mgp' : {
1106	    'blankendmotherlist':1,
1107	    'tagnotindentable':1,
1108	    'spacedlistitem':1,
1109	    'imgalignable':1,
1110	    'autotocnewpagebefore':1,
1111	    },
1112	  'tex' : {
1113	    'imgasdefterm':1,
1114	    'autonumberlist':1,
1115	    'autonumbertitle':1,
1116	    'spacedlistitem':1,
1117	    'blankendmotherlist':1,
1118	    'tableable':1,
1119	    'tablecellstrip':1,
1120	    'tabletitlerowinbold':1,
1121	    'blankendtable':1,
1122	    'verbblocknotescaped':1,
1123	    'keeplistindent':1,
1124	    'listmaxdepth':4,
1125	    'barinsidequote':1,
1126	    'finalescapetitle':1,
1127	    'autotocnewpageafter':1,
1128	    'mapbar2pagebreak':1,
1129	    'tablecellaligntype':'column'
1130	    },
1131	  'lout': {
1132	    'keepquoteindent':1,
1133	    'escapeurl':1,
1134	    'verbblocknotescaped':1,
1135	    'tableable':0,
1136	    'imgalignable':1,
1137	    'mapbar2pagebreak':1,
1138	    'titleblocks':1,
1139	    'notbreakparaopen':1
1140	    },
1141	  'moin': {
1142	    'spacedlistitem':1,
1143	    'linkable':1,
1144	    'blankendmotherlist':1,
1145	    'keeplistindent':1,
1146	    'tableable':1,
1147	    'barinsidequote':1,
1148	    'blankendtable':1,
1149	    'tabletitlerowinbold':1,
1150	    'tablecellstrip':1,
1151	    'autotocwithbars':1,
1152	    'tablecellaligntype':'cell'
1153	    },
1154	  'man' : {
1155	    'spacedlistitem':1,
1156	    'indentverbblock':1,
1157	    'blankendmotherlist':1,
1158	    'tagnotindentable':1,
1159	    'tableable':1,
1160	    'tablecellaligntype':'column',
1161	    'tabletitlerowinbold':1,
1162	    'tablecellstrip':1,
1163	    'blankendtable':1,
1164	    'keeplistindent':0,
1165	    'barinsidequote':1,
1166	    'parainsidelist':0,
1167	    },
1168	  'pm6' : {
1169	    'keeplistindent':1,
1170	    'verbblockfinalescape':1,
1171	    #TODO add support for these - maybe set a JOINNEXT char and
1172	    #     do it on addLineBreaks()
1173	    'notbreaklistopen':1,
1174	    'notbreakparaopen':1,
1175	    'barinsidequote':1,
1176	    'autotocwithbars':1,
1177	    'onelinepara':1,
1178	    }
1179	}
1180
1181	# exceptions for --css-sugar
1182	if config['css-sugar'] and config['target'] in ('html','xhtml'):
1183		rules_bank['html']['indentverbblock'] = 0
1184		rules_bank['html']['autotocwithbars'] = 0
1185
1186	# get the target specific rules
1187	if config['target'] == 'xhtml':
1188		myrules = rules_bank['html'].copy()   # inheritance
1189		myrules.update(rules_bank['xhtml'])   # get XHTML specific
1190	else:
1191		myrules = rules_bank[config['target']].copy()
1192
1193	# populate return dictionary
1194	for key in allrules: ret[key] = 0        # reset all
1195	ret.update(myrules)                      # get rules
1196
1197	return ret
1198
1199
1200##############################################################################
1201
1202
1203def getRegexes():
1204	"Returns all the regexes used to find the t2t marks"
1205
1206	bank = {
1207	'blockVerbOpen':
1208		re.compile(r'^```\s*$'),
1209	'blockVerbClose':
1210		re.compile(r'^```\s*$'),
1211	'blockRawOpen':
1212		re.compile(r'^"""\s*$'),
1213	'blockRawClose':
1214		re.compile(r'^"""\s*$'),
1215	'quote':
1216		re.compile(r'^\t+'),
1217	'1lineVerb':
1218		re.compile(r'^``` (?=.)'),
1219	'1lineRaw':
1220		re.compile(r'^""" (?=.)'),
1221	# mono, raw, bold, italic, underline:
1222	# - marks must be glued with the contents, no boundary spaces
1223	# - they are greedy, so in ****bold****, turns to <b>**bold**</b>
1224	'fontMono':
1225		re.compile(  r'``([^\s](|.*?[^\s])`*)``'),
1226	'raw':
1227		re.compile(  r'""([^\s](|.*?[^\s])"*)""'),
1228	'fontBold':
1229		re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
1230	'fontItalic':
1231		re.compile(  r'//([^\s](|.*?[^\s])/*)//'),
1232	'fontUnderline':
1233		re.compile(  r'__([^\s](|.*?[^\s])_*)__'),
1234	'list':
1235		re.compile(r'^( *)(-) (?=[^ ])'),
1236	'numlist':
1237		re.compile(r'^( *)(\+) (?=[^ ])'),
1238	'deflist':
1239		re.compile(r'^( *)(:) (.*)$'),
1240	'listclose':
1241		re.compile(r'^( *)([-+:])\s*$'),
1242	'bar':
1243		re.compile(r'^(\s*)([_=-]{20,})\s*$'),
1244	'table':
1245		re.compile(r'^ *\|\|? '),
1246	'blankline':
1247		re.compile(r'^\s*$'),
1248	'comment':
1249		re.compile(r'^%'),
1250
1251	# auxiliar tag regexes
1252	'_imgAlign'        : re.compile(r'~A~', re.I),
1253	'_tableAlign'      : re.compile(r'~A~', re.I),
1254	'_anchor'          : re.compile(r'~A~', re.I),
1255	'_tableBorder'     : re.compile(r'~B~', re.I),
1256	'_tableColAlign'   : re.compile(r'~C~', re.I),
1257	'_tableCellColSpan': re.compile(r'~S~', re.I),
1258	'_tableCellAlign'  : re.compile(r'~A~', re.I),
1259	}
1260
1261	# special char to place data on TAGs contents  (\a == bell)
1262	bank['x'] = re.compile('\a')
1263
1264	# %%macroname [ (formatting) ]
1265	bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
1266	                            string.join(MACROS.keys(), '|')), re.I)
1267
1268	# %%TOC special macro for TOC positioning
1269	bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
1270
1271	# almost complicated title regexes ;)
1272	titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
1273	bank[   'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
1274	bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
1275
1276	### complicated regexes begin here ;)
1277	#
1278	# textual descriptions on --help's style: [...] is optional, | is OR
1279
1280
1281	### first, some auxiliar variables
1282	#
1283
1284	# [image.EXT]
1285	patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
1286
1287	# link things
1288	urlskel = {
1289	  'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
1290	  'guess' : r'(www[23]?|ftp)\.',         # w/out proto, try to guess
1291	  'login' : r'A-Za-z0-9_.-',             # for ftp://login@domain.com
1292	  'pass'  : r'[^ @]*',                   # for ftp://login:pass@dom.com
1293	  'chars' : r'A-Za-z0-9%._/~:,=$@&+-',   # %20(space), :80(port), D&D
1294	  'anchor': r'A-Za-z0-9%._-',            # %nn(encoded)
1295	  'form'  : r'A-Za-z0-9/%&=+;.,$@*_-',   # .,@*_-(as is)
1296	  'punct' : r'.,;:!?'
1297	}
1298
1299	# username [ :password ] @
1300	patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
1301
1302	# [ http:// ] [ username:password@ ] domain.com [ / ]
1303	#     [ #anchor | ?form=data ]
1304	retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
1305	             urlskel['proto'],patt_url_login, urlskel['guess'],
1306	             urlskel['chars'],urlskel['form'],urlskel['anchor'])
1307
1308	# filename | [ filename ] #anchor
1309	retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
1310	             urlskel['chars'],urlskel['chars'],urlskel['anchor'])
1311
1312	# user@domain [ ?form=data ]
1313	patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
1314	             urlskel['login'],urlskel['form'])
1315
1316	# saving for future use
1317	bank['_urlskel'] = urlskel
1318
1319	### and now the real regexes
1320	#
1321
1322	bank['email'] = re.compile(patt_email,re.I)
1323
1324	# email | url
1325	bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
1326
1327	# \[ label | imagetag    url | email | filename \]
1328	bank['linkmark'] = re.compile(
1329		r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
1330		  patt_img, retxt_url, patt_email, retxt_url_local),
1331		re.L+re.I)
1332
1333	# image
1334	bank['img'] = re.compile(patt_img, re.L+re.I)
1335
1336	# special things
1337	bank['special'] = re.compile(r'^%!\s*')
1338	return bank
1339### END OF regex nightmares
1340
1341
1342##############################################################################
1343
1344class error(Exception):
1345	pass
1346def echo(msg):   # for quick debug
1347	print '\033[32;1m%s\033[m'%msg
1348def Quit(msg=''):
1349	if msg: print msg
1350	sys.exit(0)
1351def Error(msg):
1352	msg = _("%s: Error: ")%my_name + msg
1353	raise error, msg
1354def getTraceback():
1355	try:
1356		from traceback import format_exception
1357		etype, value, tb = sys.exc_info()
1358		return string.join(format_exception(etype, value, tb), '')
1359	except: pass
1360def getUnknownErrorMessage():
1361	msg = '%s\n%s (%s):\n\n%s'%(
1362	  _('Sorry! Txt2tags aborted by an unknow error.'),
1363	  _('Please send the following Error Traceback to the author'),
1364	  my_email, getTraceback())
1365	return msg
1366def Message(msg,level):
1367	if level <= VERBOSE and not QUIET:
1368		prefix = '-'*5
1369		print "%s %s"%(prefix*level, msg)
1370def Debug(msg,id=0,linenr=None):
1371	"Show debug messages, categorized (colored or not)"
1372	if QUIET or not DEBUG: return
1373	if int(id) not in range(8): id = 0
1374	# 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light
1375	ids            = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET']
1376	colors_bgdark  = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1']
1377	colors_bglight = ['0'  ,'1'  ,'3'  ,'6'  ,'4'  ,'5'  ,'2'  ,'0'  ]
1378	if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
1379	if COLOR_DEBUG:
1380		if BG_LIGHT: color = colors_bglight[id]
1381		else       : color = colors_bgdark[id]
1382		msg = '\033[3%sm%s\033[m'%(color,msg)
1383	print "++ %s: %s"%(ids[id],msg)
1384def Readfile(file, remove_linebreaks=0, ignore_error=0):
1385	data = []
1386	if file == '-':
1387		try: data = sys.stdin.readlines()
1388		except:
1389			if not ignore_error:
1390				Error(_('You must feed me with data on STDIN!'))
1391	else:
1392		try: f = open(file); data = f.readlines() ; f.close()
1393		except:
1394			if not ignore_error:
1395				Error(_("Cannot read file:")+" %s"%file)
1396	if remove_linebreaks:
1397		data = map(lambda x:re.sub('[\n\r]+$','',x), data)
1398	Message(_("Readed file (%d lines): %s")%(len(data),file),2)
1399	return data
1400def Savefile(file, contents):
1401	try: f = open(file, 'wb')
1402	except: Error(_("Cannot open file for writing:")+" %s"%file)
1403	if type(contents) == type([]): doit = f.writelines
1404	else: doit = f.write
1405	doit(contents) ; f.close()
1406
1407def showdic(dic):
1408	for k in dic.keys(): print "%15s : %s" % (k,dic[k])
1409def dotted_spaces(txt=''):
1410	return string.replace(txt,' ','.')
1411
1412# TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
1413def get_rc_path():
1414	"Return the full path for the users' RC file"
1415	# try to get the path from an env var. if yes, we're done
1416	user_defined = os.environ.get('T2TCONFIG')
1417	if user_defined: return user_defined
1418	# env var not found, so perform automatic path composing
1419	# set default filename according system platform
1420	rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'}
1421	rc_file = rc_names.get(sys.platform[:3]) or rc_names['default']
1422	# the file must be on the user directory, but where is this dir?
1423	rc_dir_search = ['HOME', 'HOMEPATH']
1424	for var in rc_dir_search:
1425		rc_dir = os.environ.get(var)
1426		if rc_dir: break
1427	# rc dir found, now we must join dir+file to compose the full path
1428	if rc_dir:
1429		# compose path and return it if the file exists
1430		rc_path = os.path.join(rc_dir, rc_file)
1431		# on windows, prefix with the drive (%homedrive%: 2k/XP/NT)
1432		if sys.platform[:3] == 'win':
1433			rc_drive = os.environ.get('HOMEDRIVE')
1434			rc_path = os.path.join(rc_drive,rc_path)
1435		return rc_path
1436	# sorry, not found
1437	return ''
1438
1439
1440
1441##############################################################################
1442
1443class CommandLine:
1444	"""
1445	Command Line class - Masters command line
1446
1447	This class checks and extract data from the provided command line.
1448	The --long options and flags are taken from the global OPTIONS,
1449	FLAGS and ACTIONS dictionaries. The short options are registered
1450	here, and also their equivalence to the long ones.
1451
1452	METHODS:
1453	  _compose_short_opts() -> str
1454	  _compose_long_opts() -> list
1455	      Compose the valid short and long options list, on the
1456	      'getopt' format.
1457
1458	  parse() -> (opts, args)
1459	      Call getopt to check and parse the command line.
1460	      It expects to receive the command line as a list, and
1461	      without the program name (sys.argv[1:]).
1462
1463	  get_raw_config() -> [RAW config]
1464	      Scans command line and convert the data to the RAW config
1465	      format. See ConfigMaster class to the RAW format description.
1466	      Optional 'ignore' and 'filter' arguments are used to filter
1467	      in or out specified keys.
1468
1469	  compose_cmdline(dict) -> [Command line]
1470	      Compose a command line list from an already parsed config
1471	      dictionary, generated from RAW by ConfigMaster(). Use
1472	      this to compose an optimal command line for a group of
1473	      options.
1474
1475	The get_raw_config() calls parse(), so the tipical use of this
1476	class is:
1477
1478            raw = CommandLine().get_raw_config(sys.argv[1:])
1479	"""
1480	def __init__(self):
1481		self.all_options = OPTIONS.keys()
1482		self.all_flags   = FLAGS.keys()
1483		self.all_actions = ACTIONS.keys()
1484
1485		# short:long options equivalence
1486		self.short_long = {
1487		  'h':'help'     ,   'V':'version',
1488		  'n':'enum-title',  'i':'infile' ,
1489		  'H':'no-headers',  'o':'outfile',
1490		  'v':'verbose'   ,  't':'target' ,
1491		  'q':'quiet'     ,  'C':'config-file'
1492		}
1493
1494		# compose valid short and long options data for getopt
1495		self.short_opts = self._compose_short_opts()
1496		self.long_opts  = self._compose_long_opts()
1497
1498	def _compose_short_opts(self):
1499		"Returns a string like 'hVt:o' with all short options/flags"
1500		ret = []
1501		for opt in self.short_long.keys():
1502			long = self.short_long[opt]
1503			if long in self.all_options: # is flag or option?
1504				opt = opt+':'        # option: have param
1505			ret.append(opt)
1506		#Debug('Valid SHORT options: %s'%ret)
1507		return string.join(ret, '')
1508
1509	def _compose_long_opts(self):
1510		"Returns a list with all the valid long options/flags"
1511		ret = map(lambda x:x+'=', self.all_options)          # add =
1512		ret.extend(self.all_flags)                           # flag ON
1513		ret.extend(self.all_actions)                         # acts
1514		ret.extend(map(lambda x:'no-'+x, self.all_flags))    # add no-*
1515		ret.extend(['no-style','no-encoding'])               # turn OFF
1516		ret.extend(['no-outfile','no-infile'])               # turn OFF
1517		ret.extend(['no-dump-config', 'no-dump-source'])     # turn OFF
1518		#Debug('Valid LONG options: %s'%ret)
1519		return ret
1520
1521	def _tokenize(self, cmd_string=''):
1522		"Convert a command line string to a list"
1523		#TODO protect quotes contents
1524		return string.split(cmd_string)
1525
1526	def parse(self, cmdline=[]):
1527		"Check/Parse a command line list     TIP: no program name!"
1528		# get the valid options
1529		short, long = self.short_opts, self.long_opts
1530		# parse it!
1531		try:
1532			opts, args = getopt.getopt(cmdline, short, long)
1533		except getopt.error, errmsg:
1534			Error(_("%s (try --help)")%errmsg)
1535		return (opts, args)
1536
1537	def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0):
1538		"Returns the options/arguments found as RAW config"
1539		if not cmdline: return []
1540		ret = []
1541		# we need lists, not strings
1542		if type(cmdline) == type(''): cmdline = self._tokenize(cmdline)
1543		opts, args = self.parse(cmdline[:])
1544		# parse all options
1545		for name,value in opts:
1546			# remove leading - and --
1547			name = re.sub('^--?', '', name)
1548			# alias to old mispelled 'suGGar'
1549			if   name ==    'css-suggar': name =    'css-sugar'
1550			elif name == 'no-css-suggar': name = 'no-css-sugar'
1551			# translate short opt to long
1552			if len(name) == 1: name = self.short_long.get(name)
1553			# outfile exception: path relative to PWD
1554			if name == 'outfile' and relative \
1555			   and value not in [STDOUT, MODULEOUT]:
1556				value = os.path.abspath(value)
1557			# config-file inclusion, path relative to PWD
1558			if name == 'config-file':
1559				configs = ConfigLines().include_config_file(
1560				          value)
1561				# remove the 'target' item of all configs
1562				configs = map(lambda c: [c[1],c[2]], configs)
1563				ret.extend(configs)
1564				continue
1565			# save it
1566			ret.append([name, value])
1567		# get infile, if any
1568		while args:
1569			infile = args.pop(0)
1570			ret.append(['infile', infile])
1571		# apply 'ignore' and 'filter' rules (filter is stronger)
1572		temp = ret[:] ; ret = []
1573		for name,value in temp:
1574			if (not filter and not ignore) or \
1575			   (filter and name in filter) or \
1576			   (ignore and name not in ignore):
1577				ret.append( ['all', name, value] )
1578		# add the original command line string as 'realcmdline'
1579		ret.append( ['all', 'realcmdline', cmdline] )
1580		return ret
1581
1582	def compose_cmdline(self, conf={}, no_check=0):
1583		"compose a full (and diet) command line from CONF dict"
1584		if not conf: return []
1585		args = []
1586		dft_options = OPTIONS.copy()
1587		cfg = conf.copy()
1588		valid_opts = self.all_options + self.all_flags
1589		use_short = {'no-headers':'H', 'enum-title':'n'}
1590		# remove useless options
1591		if not no_check and cfg.get('toc-only'):
1592			if cfg.has_key('no-headers'):
1593				del cfg['no-headers']
1594			if cfg.has_key('outfile'):
1595				del cfg['outfile']      # defaults to STDOUT
1596			if cfg.get('target') == 'txt':
1597				del cfg['target']       # already default
1598			args.append('--toc-only')  # must be the first
1599			del cfg['toc-only']
1600		# add target type
1601		if cfg.has_key('target'):
1602			args.append('-t '+cfg['target'])
1603			del cfg['target']
1604		# add other options
1605		for key in cfg.keys():
1606			if key not in valid_opts: continue  # may be a %!setting
1607			if key in ['outfile','infile']: continue   # later
1608			val = cfg[key]
1609			if not val: continue
1610			# default values are useless on cmdline
1611			if val == dft_options.get(key): continue
1612			# -short format
1613			if key in use_short.keys():
1614				args.append('-'+use_short[key])
1615				continue
1616			# --long format
1617			if key in self.all_flags: # add --option
1618				args.append('--'+key)
1619			else:                     # add --option=value
1620				args.append('--%s=%s'%(key,val))
1621		# the outfile using -o
1622		if cfg.has_key('outfile') and \
1623		   cfg['outfile'] != dft_options.get('outfile'):
1624			args.append('-o '+cfg['outfile'])
1625		# place input file(s) always at the end
1626		if cfg.has_key('infile'):
1627			args.append(string.join(cfg['infile'],' '))
1628		# return as a nice list
1629		Debug("Diet command line: %s"%string.join(args,' '), 1)
1630		return args
1631
1632##############################################################################
1633
1634class SourceDocument:
1635	"""
1636	SourceDocument class - scan document structure, extract data
1637
1638	It knows about full files. It reads a file and identify all
1639	the areas begining (Head,Conf,Body). With this info it can
1640	extract each area contents.
1641	Note: the original line break is removed.
1642
1643	DATA:
1644	  self.arearef - Save Head, Conf, Body init line number
1645	  self.areas   - Store the area names which are not empty
1646	  self.buffer  - The full file contents (with NO \\r, \\n)
1647
1648	METHODS:
1649	  get()   - Access the contents of an Area. Example:
1650	            config = SourceDocument(file).get('conf')
1651
1652	  split() - Get all the document Areas at once. Example:
1653	            head, conf, body = SourceDocument(file).split()
1654
1655	RULES:
1656	    * The document parts are sequential: Head, Conf and Body.
1657	    * One ends when the next begins.
1658	    * The Conf Area is optional, so a document can have just
1659	      Head and Body Areas.
1660
1661	    These are the Areas limits:
1662	      - Head Area: the first three lines
1663	      - Body Area: from the first valid text line to the end
1664	      - Conf Area: the comments between Head and Body Areas
1665
1666	    Exception: If the first line is blank, this means no
1667	    header info, so the Head Area is just the first line.
1668	"""
1669	def __init__(self, filename='', contents=[]):
1670		self.areas = ['head','conf','body']
1671		self.arearef = []
1672		self.areas_fancy = ''
1673		self.filename = filename
1674		self.buffer = []
1675		if filename:
1676			self.scan_file(filename)
1677		elif contents:
1678			self.scan(contents)
1679
1680	def split(self):
1681		"Returns all document parts, splitted into lists."
1682		return self.get('head'), self.get('conf'), self.get('body')
1683
1684	def get(self, areaname):
1685		"Returns head|conf|body contents from self.buffer"
1686		# sanity
1687		if areaname not in self.areas: return []
1688		if not self.buffer           : return []
1689		# go get it
1690		bufini = 1
1691		bufend = len(self.buffer)
1692		if   areaname == 'head':
1693			ini = bufini
1694			end = self.arearef[1] or self.arearef[2] or bufend
1695		elif areaname == 'conf':
1696			ini = self.arearef[1]
1697			end = self.arearef[2] or bufend
1698		elif areaname == 'body':
1699			ini = self.arearef[2]
1700			end = bufend
1701		else:
1702			Error("Unknown Area name '%s'"%areaname)
1703		lines = self.buffer[ini:end]
1704		# make sure head will always have 3 lines
1705		while areaname == 'head' and len(lines) < 3:
1706			lines.append('')
1707		return lines
1708
1709	def scan_file(self, filename):
1710		Debug("source file: %s"%filename)
1711		Message(_("Loading source document"),1)
1712		buf = Readfile(filename, remove_linebreaks=1)
1713		self.scan(buf)
1714
1715	def scan(self, lines):
1716		"Run through source file and identify head/conf/body areas"
1717		buf = lines
1718		if len(buf) == 0:
1719			Error(_('The input file is empty: %s')%self.filename)
1720		cfg_parser = ConfigLines().parse_line
1721		buf.insert(0, '')                         # text start at pos 1
1722		ref = [1,4,0]
1723		if not string.strip(buf[1]):              # no header
1724			ref[0] = 0 ; ref[1] = 2
1725		rgx = getRegexes()
1726		for i in range(ref[1],len(buf)):          # find body init:
1727			if string.strip(buf[i]) and (     # ... not blank and
1728			   buf[i][0] != '%' or            # ... not comment or
1729			   rgx['macros'].match(buf[i]) or # ... %%macro
1730			   rgx['toc'].match(buf[i])    or # ... %%toc
1731			   cfg_parser(buf[i],'include')[1]): # ... %!include
1732				ref[2] = i ; break
1733		if ref[1] == ref[2]: ref[1] = 0           # no conf area
1734		for i in 0,1,2:                           # del !existent
1735			if ref[i] >= len(buf): ref[i] = 0 # title-only
1736			if not ref[i]: self.areas[i] = ''
1737		Debug('Head,Conf,Body start line: %s'%ref)
1738		self.arearef = ref                        # save results
1739		self.buffer  = buf
1740		# fancyness sample: head conf body (1 4 8)
1741		self.areas_fancy = "%s (%s)"%(
1742		     string.join(self.areas),
1743		     string.join(map(str, map(lambda x:x or '', ref))))
1744		Message(_("Areas found: %s")%self.areas_fancy, 2)
1745
1746	def get_raw_config(self):
1747		"Handy method to get the CONF area RAW config (if any)"
1748		if not self.areas.count('conf'): return []
1749		Message(_("Scanning source document CONF area"),1)
1750		raw = ConfigLines(
1751		      file=self.filename, lines=self.get('conf'),
1752		      first_line=self.arearef[1]).get_raw_config()
1753		Debug("document raw config: %s"%raw, 1)
1754		return raw
1755
1756##############################################################################
1757
1758class ConfigMaster:
1759	"""
1760	ConfigMaster class - the configuration wizard
1761
1762	This class is the configuration master. It knows how to handle
1763	the RAW and PARSED config format. It also performs the sanity
1764	checkings for a given configuration.
1765
1766	DATA:
1767	  self.raw         - Stores the config on the RAW format
1768	  self.parsed      - Stores the config on the PARSED format
1769	  self.defaults    - Stores the default values for all keys
1770	  self.off         - Stores the OFF values for all keys
1771	  self.multi       - List of keys which can have multiple values
1772	  self.numeric     - List of keys which value must be a number
1773	  self.incremental - List of keys which are incremental
1774
1775        RAW FORMAT:
1776	  The RAW format is a list of lists, being each mother list item
1777	  a full configuration entry. Any entry is a 3 item list, on
1778	  the following format: [ TARGET, KEY, VALUE ]
1779	  Being a list, the order is preserved, so it's easy to use
1780	  different kinds of configs, as CONF area and command line,
1781	  respecting the precedence.
1782	  The special target 'all' is used when no specific target was
1783	  defined on the original config.
1784
1785	PARSED FORMAT:
1786	  The PARSED format is a dictionary, with all the 'key : value'
1787	  found by reading the RAW config. The self.target contents
1788	  matters, so this dictionary only contains the target's
1789	  config. The configs of other targets are ignored.
1790
1791	The CommandLine and ConfigLines classes have the get_raw_config()
1792	method which convert the configuration found to the RAW format.
1793	Just feed it to parse() and get a brand-new ready-to-use config
1794	dictionary. Example:
1795
1796	    >>> raw = CommandLine().get_raw_config(['-n', '-H'])
1797	    >>> print raw
1798	    [['all', 'enum-title', ''], ['all', 'no-headers', '']]
1799	    >>> parsed = ConfigMaster(raw).parse()
1800	    >>> print parsed
1801	    {'enum-title': 1, 'headers': 0}
1802	"""
1803	def __init__(self, raw=[], target=''):
1804		self.raw          = raw
1805		self.target       = target
1806		self.parsed       = {}
1807		self.dft_options  = OPTIONS.copy()
1808		self.dft_flags    = FLAGS.copy()
1809		self.dft_actions  = ACTIONS.copy()
1810		self.dft_settings = SETTINGS.copy()
1811		self.defaults     = self._get_defaults()
1812		self.off          = self._get_off()
1813		self.multi        = ['infile', 'options','preproc','postproc']
1814		self.incremental  = ['verbose']
1815		self.numeric      = ['toc-level','split']
1816
1817	def _get_defaults(self):
1818		"Get the default values for all config/options/flags"
1819		empty = {}
1820		for kw in CONFIG_KEYWORDS: empty[kw] = ''
1821		empty.update(self.dft_options)
1822		empty.update(self.dft_flags)
1823		empty.update(self.dft_actions)
1824		empty.update(self.dft_settings)
1825		empty['realcmdline'] = ''  # internal use only
1826		empty['sourcefile']  = ''  # internal use only
1827		return empty
1828
1829	def _get_off(self):
1830		"Turns OFF all the config/options/flags"
1831		off = {}
1832		for key in self.defaults.keys():
1833			kind = type(self.defaults[key])
1834			if kind == type(9):
1835				off[key] = 0
1836			elif kind == type(''):
1837				off[key] = ''
1838			elif kind == type([]):
1839				off[key] = []
1840			else:
1841				Error('ConfigMaster: %s: Unknown type'+key)
1842		return off
1843
1844	def _check_target(self):
1845		"Checks if the target is already defined. If not, do it"
1846		if not self.target:
1847			self.target = self.find_value('target')
1848
1849	def get_target_raw(self):
1850		"Returns the raw config for self.target or 'all'"
1851		ret = []
1852		self._check_target()
1853		for entry in self.raw:
1854			if entry[0] in [self.target, 'all']:
1855				ret.append(entry)
1856		return ret
1857
1858	def add(self, key, val):
1859		"Adds the key:value pair to the config dictionary (if needed)"
1860		# %!options
1861		if key == 'options':
1862			ignoreme = self.dft_actions.keys() + ['target']
1863			ignoreme.remove('dump-config')
1864			ignoreme.remove('dump-source')
1865			raw_opts = CommandLine().get_raw_config(
1866			             val, ignore=ignoreme)
1867			for target, key, val in raw_opts:
1868				self.add(key, val)
1869			return
1870		# the no- prefix turns OFF this key
1871		if key[:3] == 'no-':
1872			key = key[3:]              # remove prefix
1873			val = self.off.get(key)    # turn key OFF
1874		# is this key valid?
1875		if key not in self.defaults.keys():
1876			Debug('Bogus Config %s:%s'%(key,val),1)
1877			return
1878		# is this value the default one?
1879		if val == self.defaults.get(key):
1880			# if default value, remove previous key:val
1881			if self.parsed.has_key(key):
1882				del self.parsed[key]
1883			# nothing more to do
1884			return
1885		# flags ON comes empty. we'll add the 1 value now
1886		if val == '' and \
1887		   key in self.dft_flags.keys()+self.dft_actions.keys():
1888			val = 1
1889		# multi value or single?
1890		if key in self.multi:
1891			# first one? start new list
1892			if not self.parsed.has_key(key):
1893				self.parsed[key] = []
1894			self.parsed[key].append(val)
1895		# incremental value? so let's add it
1896		elif key in self.incremental:
1897			self.parsed[key] = (self.parsed.get(key) or 0) + val
1898		else:
1899			self.parsed[key] = val
1900		fancykey = dotted_spaces("%12s"%key)
1901		Message(_("Added config %s : %s")%(fancykey,val),3)
1902
1903	def get_outfile_name(self, config={}):
1904		"Dirname is the same for {in,out}file"
1905		infile, outfile = config['sourcefile'], config['outfile']
1906		if outfile and outfile not in [STDOUT, MODULEOUT] \
1907		   and not os.path.isabs(outfile):
1908			outfile = os.path.join(os.path.dirname(infile), outfile)
1909		if infile == STDIN    and not outfile: outfile = STDOUT
1910		if infile == MODULEIN and not outfile: outfile = MODULEOUT
1911		if not outfile and (infile and config.get('target')):
1912			basename = re.sub('\.(txt|t2t)$','',infile)
1913			outfile = "%s.%s"%(basename, config['target'])
1914		Debug(" infile: '%s'"%infile , 1)
1915		Debug("outfile: '%s'"%outfile, 1)
1916		return outfile
1917
1918	def sanity(self, config, gui=0):
1919		"Basic config sanity checkings"
1920		if not config: return {}
1921		target = config.get('target')
1922		# some actions don't require target specification
1923		if not target:
1924			for action in NO_TARGET:
1925				if config.get(action):
1926					target = 'txt'
1927					break
1928		# on GUI, some checkings are skipped
1929		if not gui:
1930			# we *need* a target
1931			if not target:
1932				Error(_('No target specified (try --help)')+\
1933				'\n\n'+\
1934				_('Maybe trying to convert an old v1.x file?'))
1935			# and of course, an infile also
1936			if not config.get('infile'):
1937				Error(_('Missing input file (try --help)'))
1938			# is the target valid?
1939			if not TARGETS.count(target):
1940				Error(_("Invalid target '%s' (try --help)")%\
1941				     target)
1942		# ensure all keys are present
1943		empty = self.defaults.copy() ; empty.update(config)
1944		config = empty.copy()
1945		# check integers options
1946		for key in config.keys():
1947			if key in self.numeric:
1948				try: config[key] = int(config[key])
1949				except: Error(_('--%s value must be a number'
1950				                )%key)
1951		# check split level value
1952		if config['split'] not in [0,1,2]:
1953			Error(_('Option --split must be 0, 1 or 2'))
1954		# --toc-only is stronger than others
1955		if config['toc-only']:
1956			config['headers'] = 0
1957			config['toc']     = 0
1958			config['split']   = 0
1959			config['gui']     = 0
1960			config['outfile'] = config['outfile'] or STDOUT
1961		# splitting is disable for now (future: HTML only, no STDOUT)
1962		config['split'] = 0
1963		# restore target
1964		config['target'] = target
1965		# set output file name
1966		config['outfile'] = self.get_outfile_name(config)
1967		# checking suicide
1968		if config['sourcefile'] == config['outfile'] and \
1969		   config['outfile'] not in [STDOUT,MODULEOUT] and not gui:
1970			Error(_("Input and Output files are the same: %s")%(
1971			config['outfile']))
1972		return config
1973
1974	def parse(self):
1975		"Returns the parsed config for the current target"
1976		raw = self.get_target_raw()
1977		for target, key, value in raw:
1978			self.add(key, value)
1979		Message(_("Added the following keys: %s")%string.join(
1980		         self.parsed.keys(),', '),2)
1981		return self.parsed.copy()
1982
1983	def find_value(self, key='', target=''):
1984		"Scans ALL raw config to find the desired key"
1985		ret = []
1986		# scan and save all values found
1987		for targ, k, val in self.raw:
1988			if targ in [target, 'all'] and k == key:
1989				ret.append(val)
1990		if not ret: return ''
1991		# if not multi value, return only the last found
1992		if key in self.multi: return ret
1993		else                : return ret[-1]
1994
1995########################################################################
1996
1997class ConfigLines:
1998	"""
1999	ConfigLines class - the config file data extractor
2000
2001	This class reads and parse the config lines on the %!key:val
2002	format, converting it to RAW config. It deals with user
2003	config file (RC file), source document CONF area and
2004	%!includeconf directives.
2005
2006	Call it passing a file name or feed the desired config lines.
2007	Then just call the get_raw_config() method and wait to
2008	receive the full config data on the RAW format. This method
2009	also follows the possible %!includeconf directives found on
2010	the config lines. Example:
2011
2012	    raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
2013
2014	The parse_line() method is also useful to be used alone,
2015	to identify and tokenize a single config line. For example,
2016	to get the %!include command components, on the source
2017	document BODY:
2018
2019	    target, key, value = ConfigLines().parse_line(body_line)
2020	"""
2021	def __init__(self, file='', lines=[], first_line=1):
2022		self.file = file or 'NOFILE'
2023		self.lines = lines
2024		self.first_line = first_line
2025
2026	def load_lines(self):
2027		"Make sure we've loaded the file contents into buffer"
2028		if not self.lines and not self.file:
2029			Error("ConfigLines: No file or lines provided")
2030		if not self.lines:
2031			self.lines = self.read_config_file(self.file)
2032
2033	def read_config_file(self, filename=''):
2034		"Read a Config File contents, aborting on invalid line"
2035		if not filename: return []
2036		errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
2037		lines = Readfile(filename, remove_linebreaks=1)
2038		# sanity: try to find invalid config lines
2039		for i in range(len(lines)):
2040			line = string.rstrip(lines[i])
2041			if not line: continue  # empty
2042			if line[0] != '%': Error(errormsg%(filename,i+1,line))
2043		return lines
2044
2045	def include_config_file(self, file=''):
2046		"Perform the %!includeconf action, returning RAW config"
2047		if not file: return []
2048		# current dir relative to the current file (self.file)
2049		current_dir = os.path.dirname(self.file)
2050		file = os.path.join(current_dir, file)
2051		# read and parse included config file contents
2052		lines = self.read_config_file(file)
2053		return ConfigLines(file=file, lines=lines).get_raw_config()
2054
2055	def get_raw_config(self):
2056		"Scan buffer and extract all config as RAW (including includes)"
2057		ret = []
2058		self.load_lines()
2059		first = self.first_line
2060		for i in range(len(self.lines)):
2061			line = self.lines[i]
2062			Message(_("Processing line %03d: %s")%(first+i,line),2)
2063			target, key, val = self.parse_line(line)
2064			if not key: continue    # no config on this line
2065			if key == 'includeconf':
2066				err = _('A file cannot include itself (loop!)')
2067				if val == self.file:
2068					Error("%s: %%!includeconf: %s"%(
2069						err, self.file))
2070				more_raw = self.include_config_file(val)
2071				ret.extend(more_raw)
2072				Message(_("Finished Config file inclusion: %s"
2073				          )%(val),2)
2074			else:
2075				ret.append([target, key, val])
2076				Message(_("Added %s")%key,3)
2077		return ret
2078
2079	def parse_line(self, line='', keyname='', target=''):
2080		"Detects %!key:val config lines and extract data from it"
2081		empty = ['', '', '']
2082		if not line: return empty
2083		no_target = ['target', 'includeconf']
2084		re_name   = keyname or '[a-z]+'
2085		re_target = target  or '[a-z]*'
2086		cfgregex  = re.compile("""
2087		  ^%%!\s*               # leading id with opt spaces
2088		  (?P<name>%s)\s*       # config name
2089		  (\((?P<target>%s)\))? # optional target spec inside ()
2090		  \s*:\s*               # key:value delimiter with opt spaces
2091		  (?P<value>\S.+?)      # config value
2092		  \s*$                  # rstrip() spaces and hit EOL
2093		  """%(re_name,re_target), re.I+re.VERBOSE)
2094		prepostregex = re.compile("""
2095		                        # ---[ PATTERN ]---
2096		  ^( "([^"]*)"          # "double quoted" or
2097		   | '([^']*)'          # 'single quoted' or
2098		   | ([^\s]+)           # single_word
2099		   )
2100		    \s+                 # separated by spaces
2101
2102		                        # ---[ REPLACE ]---
2103		       ( "([^"]*)"      # "double quoted" or
2104		       | '([^']*)'      # 'single quoted' or
2105		       | (.*)           # anything
2106		           )
2107		            \s*$
2108		  """, re.VERBOSE)
2109		guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
2110		match = cfgregex.match(line)
2111		if not match: return empty
2112
2113		name   = string.lower(match.group('name') or '')
2114		target = string.lower(match.group('target') or 'all')
2115		value  = match.group('value')
2116
2117		# NO target keywords: force all targets
2118		if name in no_target: target = 'all'
2119
2120		# special config for GUI colors
2121		if name == 'guicolors':
2122			valmatch = guicolors.search(value)
2123			if not valmatch: return empty
2124			value = re.split('\s+', value)
2125
2126		# Special config with two quoted values (%!preproc: "foo" 'bar')
2127		if name in ['preproc','postproc']:
2128			valmatch = prepostregex.search(value)
2129			if not valmatch: return empty
2130			getval = valmatch.group
2131			patt   = getval(2) or getval(3) or getval(4) or ''
2132			repl   = getval(6) or getval(7) or getval(8) or ''
2133			value  = (patt, repl)
2134		return [target, name, value]
2135
2136##############################################################################
2137
2138class MaskMaster:
2139	"(Un)Protect important structures from escaping and formatting"
2140	def __init__(self):
2141		self.linkmask  = 'vvvLINKvvv'
2142		self.monomask  = 'vvvMONOvvv'
2143		self.macromask = 'vvvMACROvvv'
2144		self.rawmask   = 'vvvRAWvvv'
2145		self.tocmask   = 'vvvTOCvvv'
2146		self.macroman  = MacroMaster()
2147		self.reset()
2148
2149	def reset(self):
2150		self.linkbank = []
2151		self.monobank = []
2152		self.macrobank = []
2153		self.rawbank = []
2154
2155	def mask(self, line=''):
2156		global AUTOTOC
2157
2158		# protect raw text
2159		while regex['raw'].search(line):
2160			txt = regex['raw'].search(line).group(1)
2161			txt = doEscape(TARGET,txt)
2162			self.rawbank.append(txt)
2163			line = regex['raw'].sub(self.rawmask,line,1)
2164
2165		# protect pre-formatted font text
2166		while regex['fontMono'].search(line):
2167			txt = regex['fontMono'].search(line).group(1)
2168			txt = doEscape(TARGET,txt)
2169			self.monobank.append(txt)
2170			line = regex['fontMono'].sub(self.monomask,line,1)
2171
2172		# protect macros
2173		while regex['macros'].search(line):
2174			txt = regex['macros'].search(line).group()
2175			self.macrobank.append(txt)
2176			line = regex['macros'].sub(self.macromask,line,1)
2177
2178		# protect TOC location
2179		while regex['toc'].search(line):
2180			line = regex['toc'].sub(self.tocmask,line)
2181			AUTOTOC = 0
2182
2183		# protect URLs and emails
2184		while regex['linkmark'].search(line) or \
2185		      regex['link'    ].search(line):
2186
2187			# try to match plain or named links
2188			match_link  = regex['link'].search(line)
2189			match_named = regex['linkmark'].search(line)
2190
2191			# define the current match
2192			if match_link and match_named:
2193				# both types found, which is the first?
2194				m = match_link
2195				if match_named.start() < match_link.start():
2196					m = match_named
2197			else:
2198				# just one type found, we're fine
2199				m = match_link or match_named
2200
2201			# extract link data and apply mask
2202			if m == match_link:              # plain link
2203				link = m.group()
2204				label = ''
2205				link_re = regex['link']
2206			else:                            # named link
2207				link = m.group('link')
2208				label = string.rstrip(m.group('label'))
2209				link_re = regex['linkmark']
2210			line = link_re.sub(self.linkmask,line,1)
2211
2212			# save link data to the link bank
2213			self.linkbank.append((label, link))
2214		return line
2215
2216	def undo(self, line):
2217
2218		# url & email
2219		for label,url in self.linkbank:
2220			link = get_tagged_link(label, url)
2221			line = string.replace(line, self.linkmask, link, 1)
2222
2223		# expand macros
2224		for macro in self.macrobank:
2225			macro = self.macroman.expand(macro)
2226			line = string.replace(line, self.macromask, macro,1)
2227
2228		# expand verb
2229		for mono in self.monobank:
2230			open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
2231			tagged = open+mono+close
2232			line = string.replace(line,self.monomask,tagged,1)
2233
2234		# expand raw
2235		for raw in self.rawbank:
2236			line = string.replace(line,self.rawmask,raw,1)
2237
2238		return line
2239
2240
2241##############################################################################
2242
2243
2244class TitleMaster:
2245	"Title things"
2246	def __init__(self):
2247		self.count = ['',0,0,0,0,0]
2248		self.toc   = []
2249		self.level = 0
2250		self.kind  = ''
2251		self.txt   = ''
2252		self.label = ''
2253		self.tag   = ''
2254		self.tag_hold = []
2255		self.last_level = 0
2256		self.count_id = ''
2257		self.user_labels = {}
2258		self.anchor_count = 0
2259		self.anchor_prefix = 'toc'
2260
2261	def _open_close_blocks(self):
2262		"Open new title blocks, closing the previous (if any)"
2263		if not rules['titleblocks']: return
2264		tag = ''
2265		last = self.last_level
2266		curr = self.level
2267
2268		# same level, just close the previous
2269		if curr == last:
2270			tag = TAGS.get('title%dClose'%last)
2271			if tag: self.tag_hold.append(tag)
2272
2273		# section -> subsection, more depth
2274		while curr > last:
2275			last = last + 1
2276
2277			# open the new block of subsections
2278			tag = TAGS.get('blockTitle%dOpen'%last)
2279			if tag: self.tag_hold.append(tag)
2280
2281			# jump from title1 to title3 or more
2282			# fill the gap with an empty section
2283			if curr - last > 0:
2284				tag = TAGS.get('title%dOpen'%last)
2285				tag = regex['x'].sub('', tag)      # del \a
2286				if tag: self.tag_hold.append(tag)
2287
2288		# section <- subsection, less depth
2289		while curr < last:
2290			# close the current opened subsection
2291			tag = TAGS.get('title%dClose'%last)
2292			if tag: self.tag_hold.append(tag)
2293
2294			# close the current opened block of subsections
2295			tag = TAGS.get('blockTitle%dClose'%last)
2296			if tag: self.tag_hold.append(tag)
2297
2298			last = last - 1
2299
2300			# close the previous section of the same level
2301			# the subsections were under it
2302			if curr == last:
2303				tag = TAGS.get('title%dClose'%last)
2304				if tag: self.tag_hold.append(tag)
2305
2306	def add(self, line):
2307		"Parses a new title line."
2308		if not line: return
2309		self._set_prop(line)
2310		self._open_close_blocks()
2311		self._set_count_id()
2312		self._set_label()
2313		self._save_toc_info()
2314
2315	def close_all(self):
2316		"Closes all opened title blocks"
2317		ret = []
2318		ret.extend(self.tag_hold)
2319		while self.level:
2320			tag = TAGS.get('title%dClose'%self.level)
2321			if tag: ret.append(tag)
2322			tag = TAGS.get('blockTitle%dClose'%self.level)
2323			if tag: ret.append(tag)
2324			self.level = self.level - 1
2325		return ret
2326
2327	def _save_toc_info(self):
2328		"Save TOC info, used by self.dump_marked_toc()"
2329		self.toc.append((self.level, self.count_id,
2330		                 self.txt  , self.label   ))
2331
2332	def _set_prop(self, line=''):
2333		"Extract info from original line and set data holders."
2334		# detect title type (numbered or not)
2335		id = string.lstrip(line)[0]
2336		if   id == '=': kind = 'title'
2337		elif id == '+': kind = 'numtitle'
2338		else: Error("Unknown Title ID '%s'"%id)
2339		# extract line info
2340		match = regex[kind].search(line)
2341		level = len(match.group('id'))
2342		txt   = string.strip(match.group('txt'))
2343		label = match.group('label')
2344		# parse info & save
2345		if CONF['enum-title']: kind = 'numtitle'  # force
2346		if rules['titleblocks']:
2347			self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \
2348			           TAGS.get('title%dOpen'%level)
2349		else:
2350			self.tag = TAGS.get(kind+`level`) or \
2351			           TAGS.get('title'+`level`)
2352		self.last_level = self.level
2353		self.kind  = kind
2354		self.level = level
2355		self.txt   = txt
2356		self.label = label
2357
2358	def _set_count_id(self):
2359		"Compose and save the title count identifier (if needed)."
2360		count_id = ''
2361		if self.kind == 'numtitle' and not rules['autonumbertitle']:
2362			# manually increase title count
2363			self.count[self.level] = self.count[self.level] +1
2364			# reset sublevels count (if any)
2365			max_levels = len(self.count)
2366			if self.level < max_levels-1:
2367				for i in range(self.level+1, max_levels):
2368					self.count[i] = 0
2369			# compose count id from hierarchy
2370			for i in range(self.level):
2371				count_id= "%s%d."%(count_id, self.count[i+1])
2372		self.count_id = count_id
2373
2374	def _set_label(self):
2375		"Compose and save title label, used by anchors."
2376		# remove invalid chars from label set by user
2377		self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
2378		# generate name as 15 first :alnum: chars
2379		#TODO how to translate safely accented chars to plain?
2380		#self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
2381		# 'tocN' label - sequential count, ignoring 'toc-level'
2382		#self.label = self.anchor_prefix + str(len(self.toc)+1)
2383
2384	def _get_tagged_anchor(self):
2385		"Return anchor if user defined a label, or TOC is on."
2386		ret = ''
2387		label = self.label
2388		if CONF['toc'] and self.level <= CONF['toc-level']:
2389			# this count is needed bcos self.toc stores all
2390			# titles, regardless of the 'toc-level' setting,
2391			# so we can't use self.toc lenght to number anchors
2392			self.anchor_count = self.anchor_count + 1
2393			# autonumber label (if needed)
2394			label = label or '%s%s'%(
2395			        self.anchor_prefix, self.anchor_count)
2396		if label and TAGS['anchor']:
2397			ret = regex['x'].sub(label,TAGS['anchor'])
2398		return ret
2399
2400	def _get_full_title_text(self):
2401		"Returns the full title contents, already escaped."
2402		ret = self.txt
2403		# insert count_id (if any) before text
2404		if self.count_id:
2405			ret = '%s %s'%(self.count_id, ret)
2406		# escape specials
2407		ret = doEscape(TARGET, ret)
2408		# same targets needs final escapes on title lines
2409		# it's here because there is a 'continue' after title
2410		if rules['finalescapetitle']:
2411			ret = doFinalEscape(TARGET, ret)
2412		return ret
2413
2414	def get(self):
2415		"Returns the tagged title as a list."
2416		ret = []
2417
2418		# maybe some anchoring before?
2419		anchor = self._get_tagged_anchor()
2420		self.tag = regex['_anchor'].sub(anchor, self.tag)
2421
2422		### compose & escape title text (TOC uses unescaped)
2423		full_title = self._get_full_title_text()
2424
2425		# close previous section area
2426		ret.extend(self.tag_hold)
2427		self.tag_hold = []
2428
2429		# finish title, adding "underline" on TXT target
2430		tagged = regex['x'].sub(full_title, self.tag)
2431
2432		if TARGET == 'txt':
2433			ret.append('') # blank line before
2434			ret.append(tagged)
2435			ret.append(regex['x'].sub('='*len(full_title),self.tag))
2436			ret.append('') # blank line after
2437		else:
2438			ret.append(tagged)
2439
2440		return ret
2441
2442	def dump_marked_toc(self, max_level=99):
2443		"Dumps all toc itens as a valid t2t markup list"
2444		#TODO maybe use quote+linebreaks instead lists
2445		ret = []
2446		toc_count = 1
2447		for level, count_id, txt, label in self.toc:
2448			if level > max_level: continue   # ignore
2449			indent = '  '*level
2450			id_txt = string.lstrip('%s %s'%(count_id, txt))
2451			label = label or self.anchor_prefix+`toc_count`
2452			toc_count = toc_count + 1
2453			# TOC will have links
2454			if TAGS['anchor']:
2455				# TOC is more readable with master topics
2456				# not linked at number. This is a stoled
2457				# idea from Windows .CHM help files
2458				if CONF['enum-title'] and level == 1:
2459					tocitem = '%s+ [""%s"" #%s]'%(
2460					          indent, txt, label)
2461				else:
2462					tocitem = '%s- [""%s"" #%s]'%(
2463					          indent, id_txt, label)
2464			# no links on TOC, just text
2465			else:
2466				# man don't reformat TOC lines, cool!
2467				if TARGET in ['txt', 'man']:
2468					tocitem = '%s""%s""' %(
2469					          indent, id_txt)
2470				else:
2471					tocitem = '%s- ""%s""'%(
2472					          indent, id_txt)
2473			ret.append(tocitem)
2474		return ret
2475
2476
2477##############################################################################
2478
2479#TODO check all this table mess
2480# trata linhas TABLE, com as prop do parse_row
2481# o metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
2482class TableMaster:
2483	def __init__(self, line=''):
2484		self.rows      = []
2485		self.border    = 0
2486		self.align     = 'Left'
2487		self.cellalign = []
2488		self.cellspan  = []
2489		if line:
2490			prop = self.parse_row(line)
2491			self.border    = prop['border']
2492			self.align     = prop['align']
2493			self.cellalign = prop['cellalign']
2494			self.cellspan  = prop['cellspan']
2495
2496	def _get_open_tag(self):
2497		topen     = TAGS['tableOpen']
2498		tborder   = TAGS['tableBorder']
2499		talign    = TAGS['tableAlign'+self.align]
2500		calignsep = TAGS['tableColAlignSep']
2501		calign    = ''
2502
2503		# the first line defines if table has border or not
2504		if not self.border: tborder = ''
2505		# set the columns alignment
2506		if rules['tablecellaligntype'] == 'column':
2507			calign = map(lambda x: TAGS['tableColAlign%s'%x],
2508			             self.cellalign)
2509			calign = string.join(calign, calignsep)
2510		# align full table, set border and Column align (if any)
2511		topen = regex['_tableAlign'   ].sub(talign , topen)
2512		topen = regex['_tableBorder'  ].sub(tborder, topen)
2513		topen = regex['_tableColAlign'].sub(calign , topen)
2514		# tex table spec, border or not: {|l|c|r|} , {lcr}
2515		if calignsep and not self.border:
2516			# remove cell align separator
2517			topen = string.replace(topen, calignsep, '')
2518		return topen
2519
2520	def _get_cell_align(self, cells):
2521		ret = []
2522		for cell in cells:
2523			align = 'Left'
2524			if string.strip(cell):
2525				if cell[0] == ' ' and cell[-1] == ' ':
2526					align = 'Center'
2527				elif cell[0] == ' ':
2528					align = 'Right'
2529			ret.append(align)
2530		return ret
2531
2532	def _get_cell_span(self, cells):
2533		ret = []
2534		for cell in cells:
2535			span = 0
2536			m = re.search('\a(\|+)$', cell)
2537			if m: span = len(m.group(1))+1
2538			ret.append(span)
2539		return ret
2540
2541	def _tag_cells(self, rowdata):
2542		row = []
2543		cells  = rowdata['cells']
2544		open   = TAGS['tableCellOpen']
2545		close  = TAGS['tableCellClose']
2546		sep    = TAGS['tableCellSep']
2547		calign = map(lambda x: TAGS['tableCellAlign'+x],
2548		             rowdata['cellalign'])
2549		# populate the span tag
2550		cspan = []
2551		for i in rowdata['cellspan']:
2552			if i > 0:
2553				cspan.append(regex['x'].sub(
2554				str(i), TAGS['tableCellColSpan']))
2555			else:
2556				cspan.append('')
2557
2558		# maybe is it a title row?
2559		if rowdata['title']:
2560			open  = TAGS['tableTitleCellOpen']  or open
2561			close = TAGS['tableTitleCellClose'] or close
2562			sep   = TAGS['tableTitleCellSep']   or sep
2563
2564		# should we break the line on *each* table cell?
2565		if rules['breaktablecell']: close = close+'\n'
2566
2567		# cells pre processing
2568		if rules['tablecellstrip']:
2569			cells = map(lambda x: string.strip(x), cells)
2570		if rowdata['title'] and rules['tabletitlerowinbold']:
2571			cells = map(lambda x: enclose_me('fontBold',x), cells)
2572
2573		# add cell BEGIN/END tags
2574		for cell in cells:
2575			copen = open
2576			# insert cell align into open tag (if cell is alignable)
2577			if rules['tablecellaligntype'] == 'cell':
2578				copen = regex['_tableCellAlign'].sub(
2579					calign.pop(0), copen)
2580			if rules['tablecellspannable']:
2581				copen = regex['_tableCellColSpan'].sub(
2582					cspan.pop(0), copen)
2583			row.append(copen + cell + close)
2584
2585		# maybe there are cell separators?
2586		return string.join(row, sep)
2587
2588	def add_row(self, cells):
2589		self.rows.append(cells)
2590
2591	def parse_row(self, line):
2592		# default table proprierties
2593		ret = {'border':0,'title':0,'align':'Left',
2594		       'cells':[],'cellalign':[], 'cellspan':[]}
2595		# detect table align (and remove spaces mark)
2596		if line[0] == ' ': ret['align'] = 'Center'
2597		line = string.lstrip(line)
2598		# detect title mark
2599		if line[1] == '|': ret['title'] = 1
2600		# detect border mark and normalize the EOL
2601		m = re.search(' (\|+) *$', line)
2602		if m: line = line+' ' ; ret['border'] = 1
2603		else: line = line+' | '
2604		# delete table mark
2605		line = regex['table'].sub('', line)
2606		# detect colspan  | foo | bar baz |||
2607		line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
2608		# split cells (the last is fake)
2609		ret['cells'] = string.split(line, ' | ')[:-1]
2610		# find cells span
2611		ret['cellspan'] = self._get_cell_span(ret['cells'])
2612		# remove span ID
2613		ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells'])
2614		# find cells align
2615		ret['cellalign'] = self._get_cell_align(ret['cells'])
2616		# hooray!
2617		Debug('Table Prop: %s' % ret, 7)
2618		return ret
2619
2620	def dump(self):
2621		open  = self._get_open_tag()
2622		rows  = self.rows
2623		close = TAGS['tableClose']
2624
2625		rowopen     = TAGS['tableRowOpen']
2626		rowclose    = TAGS['tableRowClose']
2627		rowsep      = TAGS['tableRowSep']
2628		titrowopen  = TAGS['tableTitleRowOpen']  or rowopen
2629		titrowclose = TAGS['tableTitleRowClose'] or rowclose
2630
2631		if rules['breaktablelineopen']:
2632			rowopen = rowopen + '\n'
2633			titrowopen = titrowopen + '\n'
2634
2635		# tex gotchas
2636		if TARGET == 'tex':
2637			if not self.border:
2638				rowopen = titrowopen = ''
2639			else:
2640				close = rowopen + close
2641
2642		# now we tag all the table cells on each row
2643		#tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
2644		tagged_cells = []
2645		for cell in rows: tagged_cells.append(self._tag_cells(cell))
2646
2647		# add row separator tags between lines
2648		tagged_rows = []
2649		if rowsep:
2650			#!py15
2651			#tagged_rows = map(lambda x:x+rowsep, tagged_cells)
2652			for cell in tagged_cells:
2653				tagged_rows.append(cell+rowsep)
2654			# remove last rowsep, because the table is over
2655			tagged_rows[-1] = string.replace(
2656			                  tagged_rows[-1], rowsep, '')
2657		# add row BEGIN/END tags for each line
2658		else:
2659			for rowdata in rows:
2660				if rowdata['title']:
2661					o,c = titrowopen, titrowclose
2662				else:
2663					o,c = rowopen, rowclose
2664				row = tagged_cells.pop(0)
2665				tagged_rows.append(o + row + c)
2666
2667		fulltable = [open] + tagged_rows + [close]
2668
2669		if rules['blankendtable']: fulltable.append('')
2670		return fulltable
2671
2672
2673##############################################################################
2674
2675
2676class BlockMaster:
2677	"TIP: use blockin/out to add/del holders"
2678	def __init__(self):
2679		self.BLK = []
2680		self.HLD = []
2681		self.PRP = []
2682		self.depth = 0
2683		self.last = ''
2684		self.tableparser = None
2685		self.contains = {
2686		  'para'    :['passthru','raw'],
2687		  'verb'    :[],
2688		  'table'   :[],
2689		  'raw'     :[],
2690		  'passthru':[],
2691		  'quote'   :['quote','passthru','raw'],
2692		  'list'    :['list' ,'numlist' ,'deflist','para','verb',
2693		              'raw'  ,'passthru'],
2694		  'numlist' :['list' ,'numlist' ,'deflist','para','verb',
2695		              'raw'  ,'passthru'],
2696		  'deflist' :['list' ,'numlist' ,'deflist','para','verb',
2697		              'raw'  ,'passthru']
2698		}
2699		self.allblocks = self.contains.keys()
2700
2701	def block(self):
2702		if not self.BLK: return ''
2703		return self.BLK[-1]
2704
2705	def isblock(self, name=''):
2706		return self.block() == name
2707
2708	def prop(self, key):
2709		if not self.PRP: return ''
2710		return self.PRP[-1].get(key) or ''
2711
2712	def propset(self, key, val):
2713		self.PRP[-1][key] = val
2714		#Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
2715		#Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
2716
2717	def hold(self):
2718		if not self.HLD: return []
2719		return self.HLD[-1]
2720
2721	def holdadd(self, line):
2722		if self.block()[-4:] == 'list': line = [line]
2723		self.HLD[-1].append(line)
2724		Debug('HOLD add: %s'%repr(line), 4)
2725		Debug('FULL HOLD: %s'%self.HLD, 4)
2726
2727	def holdaddsub(self, line):
2728		self.HLD[-1][-1].append(line)
2729		Debug('HOLD addsub: %s'%repr(line), 4)
2730		Debug('FULL HOLD: %s'%self.HLD, 4)
2731
2732	def holdextend(self, lines):
2733		if self.block()[-4:] == 'list': lines = [lines]
2734		self.HLD[-1].extend(lines)
2735		Debug('HOLD extend: %s'%repr(lines), 4)
2736		Debug('FULL HOLD: %s'%self.HLD, 4)
2737
2738	def blockin(self, block):
2739		ret = []
2740		if block not in self.allblocks:
2741			Error("Invalid block '%s'"%block)
2742		# first, let's close other possible open blocks
2743		while self.block() and block not in self.contains[self.block()]:
2744			ret.extend(self.blockout())
2745		# now we can gladly add this new one
2746		self.BLK.append(block)
2747		self.HLD.append([])
2748		self.PRP.append({})
2749		if block == 'table': self.tableparser = TableMaster()
2750		# deeper and deeper
2751		self.depth = len(self.BLK)
2752		Debug('block ++ (%s): %s' % (block,self.BLK), 3)
2753		return ret
2754
2755	def blockout(self):
2756		if not self.BLK: Error('No block to pop')
2757		self.last = self.BLK.pop()
2758		tagged = getattr(self, self.last)()
2759		parsed = self.HLD.pop()
2760		self.PRP.pop()
2761		self.depth = len(self.BLK)
2762		if self.last == 'table': del self.tableparser
2763		# inserting a nested block into mother
2764		if self.block():
2765			if self.block()[-4:] == 'list':
2766				self.HLD[-1][-1].append(tagged)
2767			else:
2768				self.HLD[-1].append(tagged)
2769			tagged = []   # reset. mother will have it all
2770		Debug('block -- (%s): %s' % (self.last,self.BLK), 3)
2771		Debug('RELEASED (%s): %s' % (self.last,parsed), 3)
2772		if tagged: Debug('BLOCK: %s'%tagged, 6)
2773		return tagged
2774
2775	def _last_escapes(self, line):
2776		return doFinalEscape(TARGET, line)
2777
2778	def _get_escaped_hold(self):
2779		ret = []
2780		for line in self.hold():
2781			linetype = type(line)
2782			if linetype == type(''):
2783				ret.append(self._last_escapes(line))
2784			elif linetype == type([]):
2785				ret.extend(line)
2786			else:
2787				Error("BlockMaster: Unknown HOLD item type:"
2788				      " %s"%linetype)
2789		return ret
2790
2791	def _remove_twoblanks(self, lastitem):
2792		if len(lastitem) > 1 and lastitem[-2:] == ['','']:
2793			return lastitem[:-2]
2794		return lastitem
2795
2796	def passthru(self):
2797		return self.hold()
2798
2799	def raw(self):
2800		lines = self.hold()
2801		return map(lambda x: doEscape(TARGET, x), lines)
2802
2803	def para(self):
2804		tagged = []
2805		open  = TAGS['paragraphOpen']
2806		close = TAGS['paragraphClose']
2807		lines = self._get_escaped_hold()
2808		# open (or not) paragraph
2809		if not open+close and self.last == 'para':
2810			pass # avoids multiple blank lines
2811		else:
2812			tagged.append(open)
2813		# pagemaker likes a paragraph as a single long line
2814		if rules['onelinepara']:
2815			tagged.append(string.join(lines,' '))
2816		# others are normal :)
2817		else:
2818			tagged.extend(lines)
2819		tagged.append(close)
2820
2821		# very very very very very very very very very UGLY fix
2822		# needed because <center> can't appear inside <p>
2823		try:
2824			if len(lines) == 1 and \
2825			   TARGET in ('html', 'xhtml') and \
2826			   re.match('^\s*<center>.*</center>\s*$', lines[0]):
2827				tagged = [lines[0]]
2828		except: pass
2829
2830		return tagged
2831
2832	def verb(self):
2833		"Verbatim lines are not masked, so there's no need to unmask"
2834		tagged = []
2835		tagged.append(TAGS['blockVerbOpen'])
2836		for line in self.hold():
2837			if self.prop('mapped') == 'table':
2838				line = MacroMaster().expand(line)
2839			if not rules['verbblocknotescaped']:
2840				line = doEscape(TARGET,line)
2841			if rules['indentverbblock']:
2842				line = '  '+line
2843			if rules['verbblockfinalescape']:
2844				line = doFinalEscape(TARGET, line)
2845			tagged.append(line)
2846		#TODO maybe use if not TAGS['blockVerbClose']
2847		if TARGET != 'pm6':
2848			tagged.append(TAGS['blockVerbClose'])
2849		return tagged
2850
2851	def table(self):
2852		# rewrite all table cells by the unmasked and escaped data
2853		lines = self._get_escaped_hold()
2854		for i in range(len(lines)):
2855			cells = string.split(lines[i], SEPARATOR)
2856			self.tableparser.rows[i]['cells'] = cells
2857
2858		return self.tableparser.dump()
2859
2860	def quote(self):
2861		tagged = []
2862		myre   = regex['quote']
2863		open   = TAGS['blockQuoteOpen']            # block based
2864		close  = TAGS['blockQuoteClose']
2865		qline  = TAGS['blockQuoteLine']            # line based
2866		indent = tagindent = '\t'*self.depth
2867		if rules['tagnotindentable']: tagindent = ''
2868		if not rules['keepquoteindent']: indent = ''
2869
2870		if open: tagged.append(tagindent+open)     # open block
2871		for item in self.hold():
2872			if type(item) == type([]):
2873				tagged.extend(item)        # subquotes
2874			else:
2875				item = myre.sub('', item)  # del TABs
2876				if rules['barinsidequote']:
2877					item = get_tagged_bar(item)
2878				item = self._last_escapes(item)
2879				item = qline*self.depth + item
2880				tagged.append(indent+item) # quote line
2881		if close: tagged.append(tagindent+close)   # close block
2882		return tagged
2883
2884	def deflist(self): return self.list('deflist')
2885	def numlist(self): return self.list('numlist')
2886	def list(self, name='list'):
2887		tagged    = []
2888		items     = self.hold()
2889		indent    = self.prop('indent')
2890		tagindent = indent
2891		listopen  = TAGS.get(name+'Open')
2892		listclose = TAGS.get(name+'Close')
2893		listline  = TAGS.get(name+'ItemLine')
2894		itemcount = 0
2895		if rules['tagnotindentable']: tagindent = ''
2896		if not rules['keeplistindent']: indent = ''
2897
2898		if name == 'deflist':
2899			itemopen  = TAGS[name+'Item1Open']
2900			itemclose = TAGS[name+'Item2Close']
2901			itemsep   = TAGS[name+'Item1Close']+\
2902			            TAGS[name+'Item2Open']
2903		else:
2904			itemopen  = TAGS[name+'ItemOpen']
2905			itemclose = TAGS[name+'ItemClose']
2906			itemsep   = ''
2907
2908		# ItemLine: number of leading chars identifies list depth
2909		if listline:
2910			itemopen  = listline*self.depth
2911			# dirty fix for mgp
2912			if name == 'numlist': itemopen = itemopen + '\a. '
2913
2914		# remove two-blanks from list ending mark, to avoid <p>
2915		items[-1] = self._remove_twoblanks(items[-1])
2916
2917		# open list (not nestable lists are only opened at mother)
2918		if listopen and not \
2919		   (rules['listnotnested'] and BLOCK.depth != 1):
2920			tagged.append(tagindent+listopen)
2921
2922		# tag each list item (multine items)
2923		itemopenorig = itemopen
2924
2925
2926		for item in items:
2927
2928			# add "manual" item count for noautonum targets
2929			itemcount = itemcount + 1
2930			if name == 'numlist' and not rules['autonumberlist']:
2931				n = str(itemcount)
2932				itemopen = regex['x'].sub(n, itemopenorig)
2933				del n
2934
2935			item[0] = self._last_escapes(item[0])
2936
2937			if name == 'deflist':
2938				term, rest = string.split(item[0],SEPARATOR,1)
2939				item[0] = rest
2940				if not item[0]: del item[0]      # to avoid <p>
2941				tagged.append(tagindent+itemopen+term+itemsep)
2942			else:
2943				fullitem = tagindent+itemopen
2944				tagged.append(string.replace(
2945				              item[0], SEPARATOR, fullitem))
2946				del item[0]
2947
2948			# process next lines for this item (if any)
2949			for line in item:
2950				if type(line) == type([]): # sublist inside
2951					tagged.extend(line)
2952				else:
2953					line = self._last_escapes(line)
2954					# blank lines turns to <p>
2955					if not line and rules['parainsidelist']:
2956						line = string.rstrip(indent   +\
2957						         TAGS['paragraphOpen']+\
2958						         TAGS['paragraphClose'])
2959					if not rules['keeplistindent']:
2960						line = string.lstrip(line)
2961					tagged.append(line)
2962
2963			# close item (if needed)
2964			if itemclose: tagged.append(tagindent+itemclose)
2965
2966		# close list (not nestable lists are only closed at mother)
2967		if listclose and not \
2968		   (rules['listnotnested'] and BLOCK.depth != 1):
2969			tagged.append(tagindent+listclose)
2970
2971		if rules['blankendmotherlist'] and BLOCK.depth == 1:
2972			tagged.append('')
2973
2974		return tagged
2975
2976
2977##############################################################################
2978
2979
2980class MacroMaster:
2981	def __init__(self, config={}):
2982		self.name     = ''
2983		self.config   = config or CONF
2984		self.infile   = self.config['sourcefile']
2985		self.outfile  = self.config['outfile']
2986		self.currdate = time.localtime(time.time())
2987		self.rgx      = regex.get('macros') or getRegexes()['macros']
2988		self.fileinfo = { 'infile': None, 'outfile': None }
2989		self.dft_fmt  = MACROS
2990
2991	def walk_file_format(self, fmt):
2992		"Walks the %%{in/out}file format string, expanding the % flags"
2993		i = 0; ret = ''                                 # counter/hold
2994		while i < len(fmt):                             # char by char
2995			c = fmt[i]; i = i + 1
2996			if c == '%':                            # hot char!
2997				if i == len(fmt):               # % at the end
2998					ret = ret + c
2999					break
3000				c = fmt[i]; i = i + 1           # read next
3001				ret = ret + self.expand_file_flag(c)
3002			else:
3003				ret = ret +c                    # common char
3004		return ret
3005
3006	def expand_file_flag(self, flag):
3007		"%f: filename          %F: filename (w/o extension)"
3008		"%d: dirname           %D: dirname (only parent dir)"
3009		"%p: file path         %e: extension"
3010		info = self.fileinfo[self.name]           # get dict
3011		if   flag == '%': x = '%'                 # %% -> %
3012		elif flag == 'f': x = info['name']
3013		elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
3014		elif flag == 'd': x = info['dir']
3015		elif flag == 'D': x = os.path.split(info['dir'])[-1]
3016		elif flag == 'p': x = info['path']
3017		elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name']
3018		                                ).group(2) or ''
3019		#TODO simplier way for %e ?
3020		else            : x = '%'+flag            # false alarm
3021		return x
3022
3023	def set_file_info(self, macroname):
3024		if self.fileinfo.get(macroname): return   # already done
3025		file = getattr(self, self.name)           # self.infile
3026		if file in [STDOUT, MODULEOUT]:
3027			dir = ''; path = name = file
3028		else:
3029			path = os.path.abspath(file)
3030			dir  = os.path.dirname(path)
3031			name = os.path.basename(path)
3032		self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
3033
3034	def expand(self, line=''):
3035		"Expand all macros found on the line"
3036		while self.rgx.search(line):
3037			m = self.rgx.search(line)
3038			name = self.name = string.lower(m.group('name'))
3039			fmt = m.group('fmt') or self.dft_fmt.get(name)
3040			if name == 'date':
3041				txt = time.strftime(fmt,self.currdate)
3042			elif name == 'mtime':
3043				if self.infile in [STDIN, MODULEIN]:
3044					fdate = self.currdate
3045				else:
3046					mtime = os.path.getmtime(self.infile)
3047					fdate = time.localtime(mtime)
3048				txt = time.strftime(fmt,fdate)
3049			elif name in ['infile','outfile']:
3050				self.set_file_info(name)
3051				txt = self.walk_file_format(fmt)
3052			else:
3053				Error("Unknown macro name '%s'"%name)
3054			line = self.rgx.sub(txt,line,1)
3055		return line
3056
3057
3058##############################################################################
3059
3060
3061def dumpConfig(source_raw, parsed_config):
3062	onoff = {1:_('ON'), 0:_('OFF')}
3063	data = [
3064	  (_('RC file')        , RC_RAW     ),
3065	  (_('source document'), source_raw ),
3066	  (_('command line')   , CMDLINE_RAW)
3067	]
3068	# first show all RAW data found
3069	for label, cfg in data:
3070		print _('RAW config for %s')%label
3071		for target,key,val in cfg:
3072			target = '(%s)'%target
3073			key    = dotted_spaces("%-14s"%key)
3074			val    = val or _('ON')
3075			print '  %-8s %s: %s'%(target,key,val)
3076		print
3077	# then the parsed results of all of them
3078	print _('Full PARSED config')
3079	keys = parsed_config.keys() ; keys.sort()  # sorted
3080	for key in keys:
3081		val = parsed_config[key]
3082		# filters are the last
3083		if key in ['preproc', 'postproc']:
3084			continue
3085		# flag beautifier
3086		if key in FLAGS.keys()+ACTIONS.keys():
3087			val = onoff.get(val) or val
3088		# list beautifier
3089		if type(val) == type([]):
3090			if key == 'options': sep = ' '
3091			else               : sep = ', '
3092			val = string.join(val, sep)
3093		print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
3094	print
3095	print _('Active filters')
3096	for filter in ['preproc','postproc']:
3097		for rule in parsed_config.get(filter) or []:
3098			print "%25s: %s  ->  %s"%(
3099			   dotted_spaces("%-14s"%filter),rule[0],rule[1])
3100
3101
3102def get_file_body(file):
3103	"Returns all the document BODY lines"
3104	return process_source_file(file, noconf=1)[1][2]
3105
3106
3107def finish_him(outlist, config):
3108	"Writing output to screen or file"
3109	outfile = config['outfile']
3110	outlist = unmaskEscapeChar(outlist)
3111	outlist = expandLineBreaks(outlist)
3112
3113	# apply PostProc filters
3114	if config['postproc']:
3115		filters = compile_filters(config['postproc'],
3116		                        _('Invalid PostProc filter regex'))
3117		postoutlist = []
3118		errmsg = _('Invalid PostProc filter replacement')
3119		for line in outlist:
3120			for rgx,repl in filters:
3121				try: line = rgx.sub(repl, line)
3122				except: Error("%s: '%s'"%(errmsg, repl))
3123			postoutlist.append(line)
3124		outlist = postoutlist[:]
3125
3126	if outfile == MODULEOUT:
3127		return outlist
3128	elif outfile == STDOUT:
3129		if GUI:
3130			return outlist, config
3131		else:
3132			for line in outlist: print line
3133	else:
3134		Savefile(outfile, addLineBreaks(outlist))
3135		if not GUI and not QUIET:
3136			print _('%s wrote %s')%(my_name,outfile)
3137
3138	if config['split']:
3139		if not QUIET: print "--- html..."
3140		sgml2html = 'sgml2html -s %s -l %s %s'%(
3141		            config['split'],config['lang'] or lang,outfile)
3142		if not QUIET: print "Running system command:", sgml2html
3143		os.system(sgml2html)
3144
3145
3146def toc_inside_body(body, toc, config):
3147	ret = []
3148	if AUTOTOC: return body                     # nothing to expand
3149	toc_mark = MaskMaster().tocmask
3150	# expand toc mark with TOC contents
3151	for line in body:
3152		if string.count(line, toc_mark):    # toc mark found
3153			if config['toc']:
3154				ret.extend(toc)     # include if --toc
3155			else:
3156				pass                # or remove %%toc line
3157		else:
3158			ret.append(line)            # common line
3159	return ret
3160
3161def toc_tagger(toc, config):
3162	"Convert t2t-marked TOC (it is a list) to target-tagged TOC"
3163	ret = []
3164	# tag if TOC-only TOC "by hand" (target don't have a TOC tag)
3165	if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
3166		fakeconf = config.copy()
3167		fakeconf['headers']    = 0
3168		fakeconf['toc-only']   = 0
3169		fakeconf['mask-email'] = 0
3170		fakeconf['preproc']    = []
3171		fakeconf['postproc']   = []
3172		fakeconf['css-sugar']  = 0
3173		ret,foo = convert(toc, fakeconf)
3174		set_global_config(config)   # restore config
3175	# target TOC is a tag
3176	elif config['toc'] and TAGS['TOC']:
3177		ret = [TAGS['TOC']]
3178	return ret
3179
3180def toc_formatter(toc, config):
3181	"Formats TOC for automatic placement between headers and body"
3182	if config['toc-only']: return toc              # no formatting needed
3183	if not config['toc'] : return []               # TOC disabled
3184	ret = toc
3185	# TOC open/close tags (if any)
3186	if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
3187	if TAGS['tocClose']: ret.append(TAGS['tocClose'])
3188	# autotoc specific formatting
3189	if AUTOTOC:
3190		if rules['autotocwithbars']:           # TOC between bars
3191			para = TAGS['paragraphOpen']+TAGS['paragraphClose']
3192			bar  = regex['x'].sub('-'*72,TAGS['bar1'])
3193			tocbar = [para, bar, para]
3194			ret = tocbar + ret + tocbar
3195		if rules['blankendautotoc']:           # blank line after TOC
3196			ret.append('')
3197		if rules['autotocnewpagebefore']:      # page break before TOC
3198			ret.insert(0,TAGS['pageBreak'])
3199		if rules['autotocnewpageafter']:       # page break after TOC
3200			ret.append(TAGS['pageBreak'])
3201	return ret
3202
3203
3204def doHeader(headers, config):
3205	if not config['headers']: return []
3206	if not headers: headers = ['','','']
3207	target = config['target']
3208	if not HEADER_TEMPLATE.has_key(target):
3209		Error("doheader: Unknow target '%s'"%target)
3210
3211	if target in ['html','xhtml'] and config.get('css-sugar'):
3212		template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
3213	else:
3214		template = string.split(HEADER_TEMPLATE[target], '\n')
3215
3216	head_data = {'STYLE':'', 'ENCODING':''}
3217	for key in head_data.keys():
3218		val = config.get(string.lower(key))
3219		if key == 'ENCODING': val = get_encoding_string(val, target)
3220		head_data[key] = val
3221	# parse header contents
3222	for i in 0,1,2:
3223		# expand macros
3224		contents = MacroMaster(config=config).expand(headers[i])
3225		# Escapes - on tex, just do it if any \tag{} present
3226		if target != 'tex' or \
3227		  (target == 'tex' and re.search(r'\\\w+{', contents)):
3228			contents = doEscape(target, contents)
3229		if target == 'lout':
3230			contents = doFinalEscape(target, contents)
3231
3232		head_data['HEADER%d'%(i+1)] = contents
3233	# css-inside removes STYLE line
3234	if target in ['html','xhtml'] and config.get('css-inside') and \
3235	   config.get('style'):
3236		head_data['STYLE'] = ''
3237	Debug("Header Data: %s"%head_data, 1)
3238	# scan for empty dictionary keys
3239	# if found, scan template lines for that key reference
3240	# if found, remove the reference
3241	# if there isn't any other key reference on the same line, remove it
3242	for key in head_data.keys():
3243		if head_data.get(key): continue
3244		for line in template:
3245			if string.count(line, '%%(%s)s'%key):
3246				sline = string.replace(line, '%%(%s)s'%key, '')
3247				if not re.search(r'%\([A-Z0-9]+\)s', sline):
3248					template.remove(line)
3249	# populate template with data
3250	template = string.join(template, '\n') % head_data
3251
3252	# adding CSS contents into template (for --css-inside)
3253	if target in ['html','xhtml'] and config.get('css-inside') and \
3254	   config.get('style'):
3255		TAGS = getTags(config)
3256		cssfile = config['style']
3257		if not os.path.isabs(cssfile):
3258			infile = config.get('sourcefile')
3259			cssfile = os.path.join(os.path.dirname(infile), cssfile)
3260		css = string.join(Readfile(cssfile, 1, 1), '\n')
3261		css = "%s\n%s\n%s\n" % (TAGS['cssOpen'], css, TAGS['cssClose'])
3262		template = re.sub('(?i)(</HEAD>)', css+r'\1', template)
3263
3264	return string.split(template, '\n')
3265
3266def doCommentLine(txt):
3267	# the -- string ends a (h|sg|xht)ml comment :(
3268	txt = maskEscapeChar(txt)
3269	if string.count(TAGS['comment'], '--') and \
3270	   string.count(txt, '--'):
3271		txt = re.sub('-(?=-)', r'-\\', txt)
3272
3273	if TAGS['comment']:
3274		return regex['x'].sub(txt, TAGS['comment'])
3275	return ''
3276
3277def doFooter(config):
3278	if not config['headers']: return []
3279	ret = []
3280	target = config['target']
3281	cmdline = config['realcmdline']
3282	typename = target
3283	if target == 'tex': typename = 'LaTeX2e'
3284	ppgd = '%s code generated by %s %s (%s)'%(
3285	        typename,my_name,my_version,my_url)
3286	cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
3287	ret.append('')
3288	ret.append(doCommentLine(ppgd))
3289	ret.append(doCommentLine(cmdline))
3290	ret.append(TAGS['EOD'])
3291	return ret
3292
3293def doEscape(target,txt):
3294	"Target-specific special escapes. Apply *before* insert any tag."
3295	tmpmask = 'vvvvThisEscapingSuxvvvv'
3296	if target in ['html','sgml','xhtml']:
3297		txt = re.sub('&','&amp;',txt)
3298		txt = re.sub('<','&lt;',txt)
3299		txt = re.sub('>','&gt;',txt)
3300		if target == 'sgml':
3301			txt = re.sub('\xff','&yuml;',txt)  # "+y
3302	elif target == 'pm6':
3303		txt = re.sub('<','<\#60>',txt)
3304	elif target == 'mgp':
3305		txt = re.sub('^%',' %',txt)  # add leading blank to avoid parse
3306	elif target == 'man':
3307		txt = re.sub("^([.'])", '\\&\\1',txt)           # command ID
3308		txt = string.replace(txt,ESCCHAR, ESCCHAR+'e')  # \e
3309	elif target == 'lout':
3310		# TIP: / moved to FinalEscape to avoid //italic//
3311		# TIP: these are also converted by lout:  ...  ---  --
3312		txt = string.replace(txt, ESCCHAR, tmpmask)             # \
3313		txt = string.replace(txt, '"', '"%s""'%ESCCHAR)         # "\""
3314		txt = re.sub('([|&{}@#^~])', '"\\1"',txt)               # "@"
3315		txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2))  # "\\"
3316	elif target == 'tex':
3317		# mark literal \ to be changed to $\backslash$ later
3318		txt = string.replace( txt, ESCCHAR, tmpmask)
3319		txt = re.sub('([#$&%{}])', ESCCHAR+r'\1'  , txt)  # \%
3320		txt = re.sub('([~^])'    , ESCCHAR+r'\1{}', txt)  # \~{}
3321		txt = re.sub('([<|>])'   ,         r'$\1$', txt)  # $>$
3322		txt = string.replace(txt, tmpmask,
3323		                     maskEscapeChar(r'$\backslash$'))
3324		# TIP the _ is escaped at the end
3325	return txt
3326
3327# TODO man: where - really needs to be escaped?
3328def doFinalEscape(target, txt):
3329	"Last escapes of each line"
3330	if   target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
3331	elif target == 'man' : txt = string.replace(txt, '-', r'\-')
3332	elif target == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
3333	elif target == 'lout': txt = string.replace(txt, '/', '"/"')
3334	elif target == 'tex' :
3335		txt = string.replace(txt, '_', r'\_')
3336		txt = string.replace(txt, 'vvvvTexUndervvvv', '_')  # shame!
3337	return txt
3338
3339def EscapeCharHandler(action, data):
3340	"Mask/Unmask the Escape Char on the given string"
3341	if not string.strip(data): return data
3342	if action not in ['mask','unmask']:
3343		Error("EscapeCharHandler: Invalid action '%s'"%action)
3344	if action == 'mask': return string.replace(data,'\\',ESCCHAR)
3345	else:                return string.replace(data,ESCCHAR,'\\')
3346
3347def maskEscapeChar(data):
3348	"Replace any Escape Char \ with a text mask (Input: str or list)"
3349	if type(data) == type([]):
3350		return map(lambda x: EscapeCharHandler('mask', x), data)
3351	return EscapeCharHandler('mask',data)
3352
3353def unmaskEscapeChar(data):
3354	"Undo the Escape char \ masking (Input: str or list)"
3355	if type(data) == type([]):
3356		return map(lambda x: EscapeCharHandler('unmask', x), data)
3357	return EscapeCharHandler('unmask',data)
3358
3359def addLineBreaks(mylist):
3360	"use LB to respect sys.platform"
3361	ret = []
3362	for line in mylist:
3363		line = string.replace(line,'\n',LB)  # embedded \n's
3364		ret.append(line+LB)                  # add final line break
3365	return ret
3366
3367# convert ['foo\nbar'] to ['foo', 'bar']
3368def expandLineBreaks(mylist):
3369	ret = []
3370	for line in mylist:
3371		ret.extend(string.split(line, '\n'))
3372	return ret
3373
3374def compile_filters(filters, errmsg='Filter'):
3375	if filters:
3376		for i in range(len(filters)):
3377			patt,repl = filters[i]
3378			try: rgx = re.compile(patt)
3379			except: Error("%s: '%s'"%(errmsg, patt))
3380			filters[i] = (rgx,repl)
3381	return filters
3382
3383def enclose_me(tagname, txt):
3384	return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
3385
3386def beautify_me(name, line):
3387	"where name is: bold, italic or underline"
3388	name  = 'font%s' % string.capitalize(name)
3389	open  = TAGS['%sOpen'%name]
3390	close = TAGS['%sClose'%name]
3391	txt = r'%s\1%s'%(open, close)
3392	line = regex[name].sub(txt,line)
3393	return line
3394
3395def get_tagged_link(label, url):
3396	ret = ''
3397	target = CONF['target']
3398	image_re = regex['img']
3399
3400	# set link type
3401	if regex['email'].match(url):
3402		linktype = 'email'
3403	else:
3404		linktype = 'url';
3405
3406	# escape specials from TEXT parts
3407	label = doEscape(target,label)
3408
3409	# escape specials from link URL
3410	if rules['linkable'] and rules['escapeurl']:
3411		url = doEscape(target, url)
3412
3413	# if not linkable, the URL is plain text, that needs escape
3414	if not rules['linkable']:
3415		if target == 'tex':
3416			url = re.sub('^#', '\#', url) # ugly, but compile
3417		else:
3418			url = doEscape(target,url)
3419
3420	# adding protocol to guessed link
3421	guessurl = ''
3422	if linktype == 'url' and \
3423	   re.match(regex['_urlskel']['guess'], url):
3424		if url[0] == 'w': guessurl = 'http://' +url
3425		else            : guessurl =  'ftp://' +url
3426
3427		# not link aware targets -> protocol is useless
3428		if not rules['linkable']: guessurl = ''
3429
3430	# simple link (not guessed)
3431	if not label and not guessurl:
3432		if CONF['mask-email'] and linktype == 'email':
3433			# do the email mask feature (no TAGs, just text)
3434			url = string.replace(url,'@',' (a) ')
3435			url = string.replace(url,'.',' ')
3436			url = "<%s>" % url
3437			if rules['linkable']: url = doEscape(target, url)
3438			ret = url
3439		else:
3440			# just add link data to tag
3441			tag = TAGS[linktype]
3442			ret = regex['x'].sub(url,tag)
3443
3444	# named link or guessed simple link
3445	else:
3446		# adjusts for guessed link
3447		if not label: label = url         # no   protocol
3448		if guessurl : url   = guessurl    # with protocol
3449
3450		# image inside link!
3451		if image_re.match(label):
3452			if rules['imglinkable']:  # get image tag
3453				label = parse_images(label)
3454			else:                     #  img@link !supported
3455				label = "(%s)"%image_re.match(label).group(1)
3456
3457		# putting data on the right appearance order
3458		if rules['linkable']:
3459			urlorder = [url, label]   # link before label
3460		else:
3461			urlorder = [label, url]   # label before link
3462
3463		# add link data to tag (replace \a's)
3464		ret = TAGS["%sMark"%linktype]
3465		for data in urlorder:
3466			ret = regex['x'].sub(data,ret,1)
3467
3468	return ret
3469
3470
3471def parse_deflist_term(line):
3472	"Extract and parse definition list term contents"
3473	img_re = regex['img']
3474	term   = regex['deflist'].search(line).group(3)
3475
3476	# mask image inside term as (image.jpg), where not supported
3477	if not rules['imgasdefterm'] and img_re.search(term):
3478		while img_re.search(term):
3479			imgfile = img_re.search(term).group(1)
3480			term = img_re.sub('(%s)'%imgfile, term, 1)
3481
3482	#TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
3483	return term
3484
3485
3486def get_tagged_bar(line):
3487	m = regex['bar'].search(line)
3488	if not m: return line
3489	txt = m.group(2)
3490
3491	# map strong bar to pagebreak
3492	if rules['mapbar2pagebreak'] and TAGS['pageBreak']:
3493		TAGS['bar2'] = TAGS['pageBreak']
3494
3495	# set bar type
3496	if txt[0] == '=': bar = TAGS['bar2']
3497	else            : bar = TAGS['bar1']
3498
3499	# to avoid comment tag confusion like <!-- ------ -->
3500	if string.count(TAGS['comment'], '--'):
3501		txt = string.replace(txt,'--','__')
3502
3503	# tag line
3504	return regex['x'].sub(txt, bar)
3505
3506
3507def get_image_align(line):
3508	"Return the image (first found) align for the given line"
3509
3510	# first clear marks that can mess align detection
3511	line = re.sub(SEPARATOR+'$', '', line)  # remove deflist sep
3512	line = re.sub('^'+SEPARATOR, '', line)  # remove list sep
3513	line = re.sub('^[\t]+'     , '', line)  # remove quote mark
3514
3515	# get image position on the line
3516	m = regex['img'].search(line)
3517	ini = m.start() ; head = 0
3518	end = m.end()   ; tail = len(line)
3519
3520	# the align detection algorithm
3521	if   ini == head and end != tail: align = 'left'   # ^img + text$
3522	elif ini != head and end == tail: align = 'right'  # ^text + img$
3523	else                            : align = 'center' # default align
3524
3525	# some special cases
3526	if BLOCK.isblock('table'): align = 'center'    # ignore when table
3527#	if TARGET == 'mgp' and align == 'center': align = 'center'
3528
3529	return align
3530
3531
3532# reference: http://www.iana.org/assignments/character-sets
3533# http://www.drclue.net/F1.cgi/HTML/META/META.html
3534def get_encoding_string(enc, target):
3535	if not enc: return ''
3536	# target specific translation table
3537	translate = {
3538	'tex': {
3539	  # missing: ansinew , applemac , cp437 , cp437de , cp865
3540	  'us-ascii'    : 'ascii',
3541	  'windows-1250': 'cp1250',
3542	  'windows-1252': 'cp1252',
3543	  'ibm850'      : 'cp850',
3544	  'ibm852'      : 'cp852',
3545	  'iso-8859-1'  : 'latin1',
3546	  'iso-8859-2'  : 'latin2',
3547	  'iso-8859-3'  : 'latin3',
3548	  'iso-8859-4'  : 'latin4',
3549	  'iso-8859-5'  : 'latin5',
3550	  'iso-8859-9'  : 'latin9',
3551	  'koi8-r'      : 'koi8-r'
3552	  }
3553	}
3554	# normalization
3555	enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii'  , enc)
3556	enc = re.sub('(?i)(ibm|cp)?85([02])'        ,'ibm85\\2'  , enc)
3557	enc = re.sub('(?i)(iso[_-]?)?8859[_-]?'     ,'iso-8859-' , enc)
3558	enc = re.sub('iso-8859-($|[^1-9]).*'        ,'iso-8859-1', enc)
3559	# apply translation table
3560	try: enc = translate[target][string.lower(enc)]
3561	except: pass
3562	return enc
3563
3564
3565##############################################################################
3566##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
3567##############################################################################
3568
3569
3570def process_source_file(file='', noconf=0, contents=[]):
3571	"""
3572	Find and Join all the configuration available for a source file.
3573	No sanity checkings are done on this step.
3574	It also extracts the source document parts into separate holders.
3575
3576	The config scan order is:
3577	   1. The user configuration file (i.e. $HOME/.txt2tagsrc)
3578	   2. The source document's CONF area
3579	   3. The command line options
3580
3581	The return data is a tuple of two items:
3582	   1. The parsed config dictionary
3583	   2. The document's parts, as a (head, conf, body) tuple
3584
3585	All the conversion process will be based on the data and
3586	configuration returned by this function.
3587	The source files is readed on this step only.
3588	"""
3589	if contents:
3590		source = SourceDocument(contents=contents)
3591	else:
3592		source = SourceDocument(file)
3593	head, conf, body = source.split()
3594	Message(_("Source document contents stored"),2)
3595	if not noconf:
3596		# read document config
3597		source_raw = source.get_raw_config()
3598		# join all the config directives found, then parse it
3599		full_raw = RC_RAW + source_raw + CMDLINE_RAW
3600		Message(_("Parsing and saving all config found (%03d items)")%(
3601		        len(full_raw)),1)
3602		full_parsed = ConfigMaster(full_raw).parse()
3603		# add manually the filemane to the conf dic
3604		if contents:
3605			full_parsed['sourcefile'] = MODULEIN
3606			full_parsed['infile'] = MODULEIN
3607			full_parsed['outfile'] = MODULEOUT
3608		else:
3609			full_parsed['sourcefile'] = file
3610		# maybe should we dump the config found?
3611		if full_parsed.get('dump-config'):
3612			dumpConfig(source_raw, full_parsed)
3613			Quit()
3614		# okay, all done
3615		Debug("FULL config for this file: %s"%full_parsed, 1)
3616	else:
3617		full_parsed = {}
3618	return full_parsed, (head,conf,body)
3619
3620def get_infiles_config(infiles):
3621	"""
3622	Find and Join into a single list, all configuration available
3623	for each input file. This function is supposed to be the very
3624	first one to be called, before any processing.
3625	"""
3626	ret = []
3627	if not infiles: return []
3628	for infile in infiles:
3629		ret.append((process_source_file(infile)))
3630	return ret
3631
3632def convert_this_files(configs):
3633	global CONF
3634	for myconf,doc in configs:                 # multifile support
3635		target_head = []
3636		target_toc  = []
3637		target_body = []
3638		target_foot = []
3639		source_head, source_conf, source_body = doc
3640		myconf = ConfigMaster().sanity(myconf)
3641		# compose the target file Headers
3642		#TODO escape line before?
3643		#TODO see exceptions by tex and mgp
3644		Message(_("Composing target Headers"),1)
3645		target_head = doHeader(source_head, myconf)
3646		# parse the full marked body into tagged target
3647		first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
3648		Message(_("Composing target Body"),1)
3649		target_body, marked_toc = convert(source_body, myconf,
3650		                          firstlinenr=first_body_line)
3651		# if dump-source, we're done
3652		if myconf['dump-source']:
3653			for line in source_head+source_conf+target_body:
3654				print line
3655			return
3656		# make TOC (if needed)
3657		Message(_("Composing target TOC"),1)
3658		tagged_toc  = toc_tagger(marked_toc, myconf)
3659		target_toc  = toc_formatter(tagged_toc, myconf)
3660		target_body = toc_inside_body(target_body, target_toc, myconf)
3661		if not AUTOTOC and not myconf['toc-only']: target_toc = []
3662		# compose the target file Footer
3663		Message(_("Composing target Footer"),1)
3664		target_foot = doFooter(myconf)
3665		# finally, we have our document
3666		outlist = target_head + target_toc + target_body + target_foot
3667		# if on GUI, abort before finish_him
3668		# if module, return finish_him as list
3669		# else, write results to file or STDOUT
3670		if GUI:
3671			return outlist, myconf
3672		elif myconf.get('outfile') == MODULEOUT:
3673			return finish_him(outlist, myconf), myconf
3674		else:
3675			Message(_("Saving results to the output file"),1)
3676			finish_him(outlist, myconf)
3677
3678
3679def parse_images(line):
3680	"Tag all images found"
3681	while regex['img'].search(line) and TAGS['img'] != '[\a]':
3682		txt = regex['img'].search(line).group(1)
3683		tag = TAGS['img']
3684
3685		# HTML, XHTML and mgp!
3686		if rules['imgalignable']:
3687			align = get_image_align(line)
3688			# add align on tag
3689			align_name = string.capitalize(align)
3690			align_tag = TAGS['imgAlign'+align_name]
3691			tag = regex['_imgAlign'].sub(align_tag, tag, 1)
3692			# dirty fix to allow centered solo images
3693			if align == 'center' and TARGET in ['html','xhtml']:
3694				rest = regex['img'].sub('',line,1)
3695				if re.match('^\s+$', rest):
3696					tag = "<center>%s</center>" %tag
3697
3698		if TARGET == 'tex':
3699			tag = re.sub(r'\\b',r'\\\\b',tag)
3700			txt = string.replace(txt, '_', 'vvvvTexUndervvvv')
3701
3702		line = regex['img'].sub(tag,line,1)
3703		line = regex['x'].sub(txt,line,1)
3704	return line
3705
3706
3707def add_inline_tags(line):
3708	# beautifiers
3709	for beauti in ['Bold', 'Italic', 'Underline']:
3710		if regex['font%s'%beauti].search(line):
3711			line = beautify_me(beauti, line)
3712
3713	line = parse_images(line)
3714	return line
3715
3716
3717def get_include_contents(file, path=''):
3718	"Parses %!include: value and extract file contents"
3719	ids = {'`':'verb', '"':'raw', "'":'passthru' }
3720	id = 't2t'
3721	# set include type and remove identifier marks
3722	mark = file[0]
3723	if mark in ids.keys():
3724		if file[:2] == file[-2:] == mark*2:
3725			id = ids[mark]     # set type
3726			file = file[2:-2]  # remove marks
3727	# handle remote dir execution
3728	filepath = os.path.join(path, file)
3729	# read included file contents
3730	lines = Readfile(filepath, remove_linebreaks=1)
3731	# default txt2tags marked text, just BODY matters
3732	if id == 't2t':
3733		lines = get_file_body(filepath)
3734		lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
3735		# This appears when included hit EOF with verbatim area open
3736		#lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
3737	return id, lines
3738
3739
3740def set_global_config(config):
3741	global CONF, TAGS, regex, rules, TARGET
3742	CONF   = config
3743	TAGS   = getTags(CONF)
3744	rules  = getRules(CONF)
3745	regex  = getRegexes()
3746	TARGET = config['target']  # save for buggy functions that need global
3747
3748
3749def convert(bodylines, config, firstlinenr=1):
3750	global BLOCK
3751
3752	set_global_config(config)
3753
3754	target = config['target']
3755	BLOCK = BlockMaster()
3756	MASK  =  MaskMaster()
3757	TITLE = TitleMaster()
3758
3759	ret = []
3760	dump_source = []
3761	f_lastwasblank = 0
3762
3763	# compiling all PreProc regexes
3764	pre_filter = compile_filters(
3765		CONF['preproc'], _('Invalid PreProc filter regex'))
3766
3767	# let's mark it up!
3768	linenr = firstlinenr-1
3769	lineref = 0
3770	while lineref < len(bodylines):
3771		# defaults
3772		MASK.reset()
3773		results_box = ''
3774
3775		untouchedline = bodylines[lineref]
3776		dump_source.append(untouchedline)
3777
3778		line = re.sub('[\n\r]+$','',untouchedline)   # del line break
3779
3780		# apply PreProc filters
3781		if pre_filter:
3782			errmsg = _('Invalid PreProc filter replacement')
3783			for rgx,repl in pre_filter:
3784				try: line = rgx.sub(repl, line)
3785				except: Error("%s: '%s'"%(errmsg, repl))
3786
3787		line = maskEscapeChar(line)                  # protect \ char
3788		linenr  = linenr  +1
3789		lineref = lineref +1
3790
3791		Debug(repr(line), 2, linenr)  # heavy debug: show each line
3792
3793		# any NOT table line (or comment), closes an open table
3794		if ( BLOCK.isblock('table') or
3795		      ( BLOCK.isblock('verb') and
3796		        BLOCK.prop('mapped') == 'table'
3797		       )
3798		    ) \
3799		   and not regex['table'].search(line) \
3800		   and not regex['comment'].search(line):
3801			ret.extend(BLOCK.blockout())
3802
3803		# any NOT quote line (or comment) closes all open quotes
3804		if BLOCK.isblock('quote') \
3805		   and not regex['quote'].search(line) \
3806		   and not regex['comment'].search(line):
3807			while BLOCK.isblock('quote'):
3808				ret.extend(BLOCK.blockout())
3809
3810
3811		#-------------------------[ Raw Text ]----------------------
3812
3813		# we're already on a raw block
3814		if BLOCK.block() == 'raw':
3815
3816			# closing raw
3817			if regex['blockRawClose'].search(line):
3818				ret.extend(BLOCK.blockout())
3819				continue
3820
3821			# normal raw-inside line
3822			BLOCK.holdadd(line)
3823			continue
3824
3825		# detecting raw block init
3826		if regex['blockRawOpen'].search(line):
3827			ret.extend(BLOCK.blockin('raw'))
3828			continue
3829
3830		# one line verb-formatted text
3831		if regex['1lineRaw'].search(line):
3832			ret.extend(BLOCK.blockin('raw'))
3833			line = regex['1lineRaw'].sub('',line)
3834			BLOCK.holdadd(line)
3835			ret.extend(BLOCK.blockout())
3836			continue
3837
3838		#-----------------[ Verbatim (PRE-formatted) ]--------------
3839
3840		#TIP we'll never support beautifiers inside verbatim
3841
3842		# we're already on a verb block
3843		if BLOCK.block() == 'verb':
3844
3845			# closing verb
3846			if regex['blockVerbClose'].search(line):
3847				ret.extend(BLOCK.blockout())
3848				continue
3849
3850			# normal verb-inside line
3851			BLOCK.holdadd(line)
3852			continue
3853
3854		# detecting verb block init
3855		if regex['blockVerbOpen'].search(line):
3856			ret.extend(BLOCK.blockin('verb'))
3857			f_lastwasblank = 0
3858			continue
3859
3860		# one line verb-formatted text
3861		if regex['1lineVerb'].search(line):
3862			ret.extend(BLOCK.blockin('verb'))
3863			line = regex['1lineVerb'].sub('',line)
3864			BLOCK.holdadd(line)
3865			ret.extend(BLOCK.blockout())
3866			f_lastwasblank = 0
3867			continue
3868
3869		# tables are mapped to verb when target is not table-aware
3870		if not rules['tableable'] and regex['table'].search(line):
3871			if not BLOCK.isblock('verb'):
3872				ret.extend(BLOCK.blockin('verb'))
3873				BLOCK.propset('mapped', 'table')
3874				BLOCK.holdadd(line)
3875				continue
3876
3877		#---------------------[ blank lines ]-----------------------
3878
3879		if regex['blankline'].search(line):
3880
3881			# close open paragraph
3882			if BLOCK.isblock('para'):
3883				ret.extend(BLOCK.blockout())
3884				f_lastwasblank = 1
3885				continue
3886
3887			# close all open quotes
3888			while BLOCK.isblock('quote'):
3889				ret.extend(BLOCK.blockout())
3890
3891			# closing all open lists
3892			if f_lastwasblank:          # 2nd consecutive blank
3893				if BLOCK.block()[-4:] == 'list':
3894					BLOCK.holdaddsub('')   # helps parser
3895				while BLOCK.depth:  # closes list (if any)
3896					ret.extend(BLOCK.blockout())
3897				continue            # ignore consecutive blanks
3898
3899			# paragraph (if any) is wanted inside lists also
3900			if BLOCK.block()[-4:] == 'list':
3901				BLOCK.holdaddsub('')
3902			else:
3903				# html: show blank line (needs tag)
3904				if target in ['html','xhtml']:
3905					ret.append(TAGS['paragraphOpen']+\
3906					           TAGS['paragraphClose'])
3907				# otherwise we just show a blank line
3908				else:
3909					ret.append('')
3910
3911			f_lastwasblank = 1
3912			continue
3913
3914
3915		#---------------------[ special ]---------------------------
3916
3917		if regex['special'].search(line):
3918			# include command
3919			targ, key, val = ConfigLines().parse_line(
3920			                   line, 'include', target)
3921			if key:
3922				Debug("Found config '%s', value '%s'"%(
3923				       key,val),1,linenr)
3924
3925				incpath = os.path.dirname(CONF['sourcefile'])
3926				incfile = val
3927				err = _('A file cannot include itself (loop!)')
3928				if CONF['sourcefile'] == incfile:
3929					Error("%s: %s"%(err,incfile))
3930				inctype, inclines = get_include_contents(
3931				                      incfile, incpath)
3932				# verb, raw and passthru are easy
3933				if inctype != 't2t':
3934					ret.extend(BLOCK.blockin(inctype))
3935					BLOCK.holdextend(inclines)
3936					ret.extend(BLOCK.blockout())
3937				else:
3938					# insert include lines into body
3939					#TODO include maxdepth limit
3940					bodylines = bodylines[:lineref] \
3941					           +inclines \
3942					           +bodylines[lineref:]
3943					#TODO fix path if include@include
3944					# remove %!include call
3945					if CONF['dump-source']:
3946						dump_source.pop()
3947				continue
3948			else:
3949				Debug('Bogus Special Line',1,linenr)
3950
3951		#---------------------[ dump-source ]-----------------------
3952
3953		# we don't need to go any further
3954		if CONF['dump-source']:
3955			continue
3956
3957		#---------------------[ comments ]--------------------------
3958
3959		# just skip them (if not macro)
3960		if regex['comment'].search(line) and not \
3961		   regex['macros'].match(line) and not \
3962		   regex['toc'].match(line):
3963			continue
3964
3965		# valid line, reset blank status
3966		f_lastwasblank = 0
3967
3968		#---------------------[ Horizontal Bar ]--------------------
3969
3970		if regex['bar'].search(line):
3971
3972			# a bar closes a paragraph
3973			if BLOCK.isblock('para'):
3974				ret.extend(BLOCK.blockout())
3975
3976			# we need to close all opened quote blocks
3977			# if bar isn't allowed inside or if not a quote line
3978			if BLOCK.isblock('quote'):
3979				if not rules['barinsidequote'] or \
3980				   not regex['quote'].search(line):
3981					while BLOCK.isblock('quote'):
3982						ret.extend(BLOCK.blockout())
3983
3984			# quote + bar: continue processing for quoting
3985			if rules['barinsidequote'] and \
3986			   regex['quote'].search(line):
3987				pass
3988
3989			# just bar: save tagged line and we're done
3990			else:
3991				line = get_tagged_bar(line)
3992				if BLOCK.block()[-4:] == 'list':
3993					BLOCK.holdaddsub(line)
3994				elif BLOCK.block():
3995					BLOCK.holdadd(line)
3996				else:
3997					ret.append(line)
3998					Debug("BAR: %s"%line, 6)
3999				continue
4000
4001		#---------------------[ Title ]-----------------------------
4002
4003		#TODO set next blank and set f_lastwasblank or f_lasttitle
4004		if (regex['title'].search(line) or
4005		    regex['numtitle'].search(line)) and \
4006		    BLOCK.block()[-4:] != 'list':
4007
4008			# a title closes a paragraph
4009			if BLOCK.isblock('para'):
4010				ret.extend(BLOCK.blockout())
4011
4012			TITLE.add(line)
4013			tagged_title = TITLE.get()
4014			ret.extend(tagged_title)
4015			Debug("TITLE: %s"%tagged_title, 6)
4016
4017			f_lastwasblank = 1
4018			continue
4019
4020		#---------------------[ %%toc ]-----------------------
4021
4022		# %%toc line closes paragraph
4023		if BLOCK.block() == 'para' and regex['toc'].search(line):
4024			ret.extend(BLOCK.blockout())
4025
4026		#---------------------[ apply masks ]-----------------------
4027
4028		line = MASK.mask(line)
4029
4030		#XXX from here, only block-inside lines will pass
4031
4032		#---------------------[ Quote ]-----------------------------
4033
4034		if regex['quote'].search(line):
4035
4036			# store number of leading TABS
4037			quotedepth = len(regex['quote'].search(line).group(0))
4038
4039			# SGML doesn't support nested quotes
4040			if rules['quotenotnested']: quotedepth = 1
4041
4042			# new quote
4043			if not BLOCK.isblock('quote'):
4044				ret.extend(BLOCK.blockin('quote'))
4045
4046			# new subquotes
4047			while BLOCK.depth < quotedepth:
4048				BLOCK.blockin('quote')
4049
4050			# closing quotes
4051			while quotedepth < BLOCK.depth:
4052				ret.extend(BLOCK.blockout())
4053
4054		#---------------------[ Lists ]-----------------------------
4055
4056		# an empty item also closes the current list
4057		if BLOCK.block()[-4:] == 'list':
4058			m = regex['listclose'].match(line)
4059			if m:
4060				listindent = m.group(1)
4061				listtype = m.group(2)
4062				currlisttype = BLOCK.prop('type')
4063				currlistindent = BLOCK.prop('indent')
4064				if listindent == currlistindent and \
4065				   listtype == currlisttype:
4066					ret.extend(BLOCK.blockout())
4067					continue
4068
4069		if   regex['list'].search(line) or \
4070		  regex['numlist'].search(line) or \
4071		  regex['deflist'].search(line):
4072
4073			listindent = BLOCK.prop('indent')
4074			listids = string.join(LISTNAMES.keys(), '')
4075			m = re.match('^( *)([%s]) '%listids, line)
4076			listitemindent = m.group(1)
4077			listtype = m.group(2)
4078			listname = LISTNAMES[listtype]
4079			results_box = BLOCK.holdadd
4080
4081			# del list ID (and separate term from definition)
4082			if listname == 'deflist':
4083				term = parse_deflist_term(line)
4084				line = regex['deflist'].sub(term+SEPARATOR,line)
4085			else:
4086				line = regex[listname].sub(SEPARATOR,line)
4087
4088			# don't cross depth limit
4089			maxdepth = rules['listmaxdepth']
4090			if maxdepth and BLOCK.depth == maxdepth:
4091				if len(listitemindent) > len(listindent):
4092					listitemindent = listindent
4093
4094			# open mother list or sublist
4095			if BLOCK.block()[-4:] != 'list' or \
4096			   len(listitemindent) > len(listindent):
4097				ret.extend(BLOCK.blockin(listname))
4098				BLOCK.propset('indent',listitemindent)
4099				BLOCK.propset('type',listtype)
4100
4101			# closing sublists
4102			while len(listitemindent) < len(BLOCK.prop('indent')):
4103				ret.extend(BLOCK.blockout())
4104
4105			# o-oh, sublist before list ("\n\n  - foo\n- foo")
4106			# fix: close sublist (as mother), open another list
4107			if BLOCK.block()[-4:] != 'list':
4108				ret.extend(BLOCK.blockin(listname))
4109				BLOCK.propset('indent',listitemindent)
4110				BLOCK.propset('type',listtype)
4111
4112		#---------------------[ Table ]-----------------------------
4113
4114		#TODO escape undesired format inside table
4115		#TODO add pm6 target
4116		if regex['table'].search(line):
4117
4118			if not BLOCK.isblock('table'):   # first table line!
4119				ret.extend(BLOCK.blockin('table'))
4120				BLOCK.tableparser.__init__(line)
4121
4122			tablerow = TableMaster().parse_row(line)
4123			BLOCK.tableparser.add_row(tablerow)     # save config
4124
4125			# maintain line to unmask and inlines
4126			line = string.join(tablerow['cells'], SEPARATOR)
4127
4128		#---------------------[ Paragraph ]-------------------------
4129
4130		if not BLOCK.block() and \
4131		   not string.count(line, MASK.tocmask): # new para!
4132			ret.extend(BLOCK.blockin('para'))
4133
4134
4135		############################################################
4136		############################################################
4137		############################################################
4138
4139
4140		#---------------------[ Final Parses ]----------------------
4141
4142		# the target-specific special char escapes for body lines
4143		line = doEscape(target,line)
4144
4145		line = add_inline_tags(line)
4146		line = MASK.undo(line)
4147
4148		#---------------------[ Hold or Return? ]-------------------
4149
4150		### now we must choose here to put the parsed line
4151		#
4152		if not results_box:
4153			# list item extra lines
4154			if BLOCK.block()[-4:] == 'list':
4155				results_box = BLOCK.holdaddsub
4156			# other blocks
4157			elif BLOCK.block():
4158				results_box = BLOCK.holdadd
4159			# no blocks
4160			else:
4161				line = doFinalEscape(target, line)
4162				results_box = ret.append
4163
4164		results_box(line)
4165
4166	# EOF: close any open para/verb/lists/table/quotes
4167	Debug('EOF',7)
4168	while BLOCK.block():
4169		ret.extend(BLOCK.blockout())
4170
4171	# maybe close some opened title area?
4172	if rules['titleblocks']:
4173		ret.extend(TITLE.close_all())
4174
4175	# maybe a major tag to enclose body? (like DIV for CSS)
4176	if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen'])
4177	if TAGS['bodyClose']: ret.append(TAGS['bodyClose'])
4178
4179	if CONF['toc-only']: ret = []
4180	marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
4181
4182	# if dump-source, all parsing is ignored
4183	if CONF['dump-source']: ret = dump_source[:]
4184
4185	return ret, marked_toc
4186
4187
4188
4189##############################################################################
4190################################### GUI ######################################
4191##############################################################################
4192#
4193# tk help: http://python.org/topics/tkinter/
4194#    tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html
4195#          /usr/lib/python*/lib-tk/Tkinter.py
4196#
4197# grid table : row=0, column=0, columnspan=2, rowspan=2
4198# grid align : sticky='n,s,e,w' (North, South, East, West)
4199# pack place : side='top,bottom,right,left'
4200# pack fill  : fill='x,y,both,none', expand=1
4201# pack align : anchor='n,s,e,w' (North, South, East, West)
4202# padding    : padx=10, pady=10, ipadx=10, ipady=10 (internal)
4203# checkbox   : offvalue is return if the _user_ deselected the box
4204# label align: justify=left,right,center
4205
4206def load_GUI_resources():
4207	"Load all extra modules and methods used by GUI"
4208	global askopenfilename, showinfo, showwarning, showerror, Tkinter
4209	from tkFileDialog import askopenfilename
4210	from tkMessageBox import showinfo,showwarning,showerror
4211	import Tkinter
4212
4213class Gui:
4214	"Graphical Tk Interface"
4215	def __init__(self, conf={}):
4216		self.root = Tkinter.Tk()    # mother window, come to butthead
4217		self.root.title(my_name)    # window title bar text
4218		self.window = self.root     # variable "focus" for inclusion
4219		self.row = 0                # row count for grid()
4220
4221		self.action_lenght = 150    # left column lenght (pixel)
4222		self.frame_margin  = 10     # frame margin size  (pixel)
4223		self.frame_border  = 6      # frame border size  (pixel)
4224
4225		# the default Gui colors, can be changed by %!guicolors
4226		self.dft_gui_colors = ['blue','white','lightblue','black']
4227		self.gui_colors = []
4228		self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
4229
4230		# on Tk, vars need to be set/get using setvar()/get()
4231		self.infile  = self.setvar('')
4232		self.target  = self.setvar('')
4233		self.target_name = self.setvar('')
4234
4235		# the checks appearance order
4236		self.checks  = [
4237		  'headers','enum-title','toc','mask-email',
4238		  'toc-only','stdout']
4239
4240		# creating variables for all checks
4241		for check in self.checks:
4242			setattr(self, 'f_'+check, self.setvar(''))
4243
4244		# load RC config
4245		self.conf = {}
4246		if conf: self.load_config(conf)
4247
4248	def load_config(self, conf):
4249		self.conf = conf
4250		self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
4251		self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
4252		self.root.config(bd=15,bg=self.bg1)
4253
4254	### config as dic for python 1.5 compat (**opts don't work :( )
4255	def entry(self, **opts): return Tkinter.Entry(self.window, opts)
4256	def label(self, txt='', bg=None, **opts):
4257		opts.update({'text':txt,'bg':bg or self.bg1})
4258		return Tkinter.Label(self.window, opts)
4259	def button(self,name,cmd,**opts):
4260		opts.update({'text':name,'command':cmd})
4261		return Tkinter.Button(self.window, opts)
4262	def check(self,name,checked=0,**opts):
4263		bg, fg = self.bg2, self.fg2
4264		opts.update({
4265		  'text':name, 'onvalue':1, 'offvalue':0,
4266		  'activeforeground':fg, 'fg':fg,
4267		  'activebackground':bg, 'bg':bg,
4268		  'highlightbackground':bg, 'anchor':'w'
4269		})
4270		chk = Tkinter.Checkbutton(self.window, opts)
4271		if checked: chk.select()
4272		chk.grid(columnspan=2, sticky='w', padx=0)
4273	def menu(self,sel,items):
4274		return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
4275
4276	# handy auxiliar functions
4277	def action(self, txt):
4278		self.label(txt, fg=self.fg1, bg=self.bg1,
4279		     wraplength=self.action_lenght).grid(column=0,row=self.row)
4280	def frame_open(self):
4281		self.window = Tkinter.Frame(self.root,bg=self.bg2,
4282		     borderwidth=self.frame_border)
4283	def frame_close(self):
4284		self.window.grid(column=1, row=self.row, sticky='w',
4285		     padx=self.frame_margin)
4286		self.window = self.root
4287		self.label('').grid()
4288		self.row = self.row + 2   # update row count
4289	def target_name2key(self):
4290		name = self.target_name.get()
4291		target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
4292		try   : key = target[0]
4293		except: key = ''
4294		self.target = self.setvar(key)
4295	def target_key2name(self):
4296		key = self.target.get()
4297		name = TARGET_NAMES.get(key) or key
4298		self.target_name = self.setvar(name)
4299
4300	def exit(self): self.root.destroy()
4301	def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
4302
4303	def askfile(self):
4304		ftypes= [(_('txt2tags files'),('*.t2t','*.txt')),
4305		         (_('All files'),'*')]
4306		newfile = askopenfilename(filetypes=ftypes)
4307		if newfile:
4308			self.infile.set(newfile)
4309			newconf = process_source_file(newfile)[0]
4310			newconf = ConfigMaster().sanity(newconf, gui=1)
4311			# restate all checkboxes after file selection
4312			#TODO how to make a refresh without killing it?
4313			self.root.destroy()
4314			self.__init__(newconf)
4315			self.mainwindow()
4316
4317	def scrollwindow(self, txt='no text!', title=''):
4318		# create components
4319		win    = Tkinter.Toplevel() ; win.title(title)
4320		frame  = Tkinter.Frame(win)
4321		scroll = Tkinter.Scrollbar(frame)
4322		text   = Tkinter.Text(frame,yscrollcommand=scroll.set)
4323		button = Tkinter.Button(win)
4324		# config
4325		text.insert(Tkinter.END, string.join(txt,'\n'))
4326		scroll.config(command=text.yview)
4327		button.config(text=_('Close'), command=win.destroy)
4328		button.focus_set()
4329		# packing
4330		text.pack(side='left', fill='both', expand=1)
4331		scroll.pack(side='right', fill='y')
4332		frame.pack(fill='both', expand=1)
4333		button.pack(ipadx=30)
4334
4335	def runprogram(self):
4336		global CMDLINE_RAW
4337		# prepare
4338		self.target_name2key()
4339		infile, target = self.infile.get(), self.target.get()
4340		# sanity
4341		if not target:
4342			showwarning(my_name,_("You must select a target type!"))
4343			return
4344		if not infile:
4345			showwarning(my_name,
4346			   _("You must provide the source file location!"))
4347			return
4348		# compose cmdline
4349		guiflags = []
4350		real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
4351		if real_cmdline_conf.has_key('infile'):
4352			del real_cmdline_conf['infile']
4353		if real_cmdline_conf.has_key('target'):
4354			del real_cmdline_conf['target']
4355		real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
4356		default_outfile = ConfigMaster().get_outfile_name(
4357		     {'sourcefile':infile, 'outfile':'', 'target':target})
4358		for opt in self.checks:
4359			val = int(getattr(self, 'f_%s'%opt).get() or "0")
4360			if opt == 'stdout': opt = 'outfile'
4361			on_config  = self.conf.get(opt) or 0
4362			on_cmdline = real_cmdline_conf.get(opt) or 0
4363			if opt == 'outfile':
4364				if on_config  == STDOUT: on_config = 1
4365				else: on_config = 0
4366				if on_cmdline == STDOUT: on_cmdline = 1
4367				else: on_cmdline = 0
4368			if val != on_config or (
4369			  val == on_config == on_cmdline and
4370			  real_cmdline_conf.has_key(opt)):
4371				if val:
4372					# was not set, but user selected on GUI
4373					Debug("user turned  ON: %s"%opt)
4374					if opt == 'outfile': opt = '-o-'
4375					else: opt = '--%s'%opt
4376				else:
4377					# was set, but user deselected on GUI
4378					Debug("user turned OFF: %s"%opt)
4379					if opt == 'outfile':
4380						opt = "-o%s"%default_outfile
4381					else: opt = '--no-%s'%opt
4382				guiflags.append(opt)
4383		cmdline = [my_name, '-t', target] +real_cmdline \
4384		          +guiflags +[infile]
4385		Debug('Gui/Tk cmdline: %s'%cmdline,5)
4386		# run!
4387		cmdline_raw_orig = CMDLINE_RAW
4388		try:
4389			# fake the GUI cmdline as the real one, and parse file
4390			CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
4391			data = process_source_file(infile)
4392			# on GUI, convert_* returns the data, not finish_him()
4393			outlist, config = convert_this_files([data])
4394			# on GUI and STDOUT, finish_him() returns the data
4395			result = finish_him(outlist, config)
4396			# show outlist in s a nice new window
4397			if result:
4398				outlist, config = result
4399				title = _('%s: %s converted to %s')%(
4400				  my_name, os.path.basename(infile),
4401				  string.upper(config['target']))
4402				self.scrollwindow(outlist, title)
4403			# show the "file saved" message
4404			else:
4405				msg = "%s\n\n  %s\n%s\n\n  %s\n%s"%(
4406				      _('Conversion done!'),
4407				      _('FROM:'), infile,
4408				      _('TO:'), config['outfile'])
4409				showinfo(my_name, msg)
4410		except error:         # common error (windowed), not quit
4411			pass
4412		except:               # fatal error (windowed and printed)
4413			errormsg = getUnknownErrorMessage()
4414			print errormsg
4415			showerror(_('%s FATAL ERROR!')%my_name,errormsg)
4416			self.exit()
4417		CMDLINE_RAW = cmdline_raw_orig
4418
4419	def mainwindow(self):
4420		self.infile.set(self.conf.get('sourcefile') or '')
4421		self.target.set(self.conf.get('target') or \
4422		              _('-- select one --'))
4423		outfile = self.conf.get('outfile')
4424		if outfile == STDOUT:                  # map -o-
4425			self.conf['stdout'] = 1
4426		if self.conf.get('headers') == None:
4427			self.conf['headers'] = 1       # map default
4428
4429		action1 = _("Enter the source file location:")
4430		action2 = _("Choose the target document type:")
4431		action3 = _("Some options you may check:")
4432		action4 = _("Some extra options:")
4433		checks_txt = {
4434		  'headers'   : _("Include headers on output"),
4435		  'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
4436		  'toc'       : _("Do TOC also (Table of Contents)"),
4437		  'mask-email': _("Hide e-mails from SPAM robots"),
4438
4439		  'toc-only'  : _("Just do TOC, nothing more"),
4440		  'stdout'    : _("Dump to screen (Don't save target file)")
4441		}
4442		targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
4443
4444		# header
4445		self.label("%s %s"%(string.upper(my_name), my_version),
4446		     bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
4447		self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
4448		     bg=self.bg1, fg=self.fg1).grid(columnspan=2)
4449		self.row = 2
4450		# choose input file
4451		self.action(action1) ; self.frame_open()
4452		e_infile = self.entry(textvariable=self.infile,width=25)
4453		e_infile.grid(row=self.row, column=0, sticky='e')
4454		if not self.infile.get(): e_infile.focus_set()
4455		self.button(_("Browse"), self.askfile).grid(
4456		    row=self.row, column=1, sticky='w', padx=10)
4457		# show outfile name, style and encoding (if any)
4458		txt = ''
4459		if outfile:
4460			txt = outfile
4461			if outfile == STDOUT: txt = _('<screen>')
4462			l_output = self.label(_('Output: ')+txt,
4463			                fg=self.fg2,bg=self.bg2)
4464			l_output.grid(columnspan=2, sticky='w')
4465		for setting in ['style','encoding']:
4466			if self.conf.get(setting):
4467				name = string.capitalize(setting)
4468				val  = self.conf[setting]
4469				self.label('%s: %s'%(name, val),
4470				     fg=self.fg2, bg=self.bg2).grid(
4471				     columnspan=2, sticky='w')
4472		# choose target
4473		self.frame_close() ; self.action(action2)
4474		self.frame_open()
4475		self.target_key2name()
4476		self.menu(self.target_name, targets_menu).grid(
4477		     columnspan=2, sticky='w')
4478		# options checkboxes label
4479		self.frame_close() ; self.action(action3)
4480		self.frame_open()
4481		# compose options check boxes, example:
4482		# self.check(checks_txt['toc'],1,variable=self.f_toc)
4483		for check in self.checks:
4484			# extra options label
4485			if check == 'toc-only':
4486				self.frame_close() ; self.action(action4)
4487				self.frame_open()
4488			txt = checks_txt[check]
4489			var = getattr(self, 'f_'+check)
4490			checked = self.conf.get(check)
4491			self.check(txt,checked,variable=var)
4492		self.frame_close()
4493		# spacer and buttons
4494		self.label('').grid() ; self.row = self.row + 1
4495		b_quit = self.button(_("Quit"), self.exit)
4496		b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
4497		b_conv = self.button(_("Convert!"), self.runprogram)
4498		b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
4499		if self.target.get() and self.infile.get():
4500			b_conv.focus_set()
4501
4502		# as documentation told me
4503		if sys.platform[:3] == 'win':
4504			self.root.iconify()
4505			self.root.update()
4506			self.root.deiconify()
4507
4508		self.root.mainloop()
4509
4510
4511##############################################################################
4512##############################################################################
4513
4514def exec_command_line(user_cmdline=[]):
4515	global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, Error
4516
4517	# extract command line data
4518	cmdline_data = user_cmdline or sys.argv[1:]
4519	CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1)
4520	cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
4521	DEBUG   = cmdline_parsed.get('debug'  ) or 0
4522	VERBOSE = cmdline_parsed.get('verbose') or 0
4523	QUIET   = cmdline_parsed.get('quiet'  ) or 0
4524	GUI     = cmdline_parsed.get('gui'    ) or 0
4525	infiles = cmdline_parsed.get('infile' ) or []
4526
4527	Message(_("Txt2tags %s processing begins")%my_version,1)
4528
4529	# the easy ones
4530	if cmdline_parsed.get('help'   ): Quit(USAGE)
4531	if cmdline_parsed.get('version'): Quit(VERSIONSTR)
4532
4533	# multifile haters
4534	if len(infiles) > 1:
4535		errmsg=_("Option --%s can't be used with multiple input files")
4536		for option in NO_MULTI_INPUT:
4537			if cmdline_parsed.get(option):
4538				Error(errmsg%option)
4539
4540	Debug("system platform: %s"%sys.platform)
4541	Debug("python version: %s"%(string.split(sys.version,'(')[0]))
4542	Debug("line break char: %s"%repr(LB))
4543	Debug("command line: %s"%sys.argv)
4544	Debug("command line raw config: %s"%CMDLINE_RAW,1)
4545
4546	# extract RC file config
4547	if cmdline_parsed.get('rc') == 0:
4548		Message(_("Ignoring user configuration file"),1)
4549	else:
4550		rc_file = get_rc_path()
4551		if os.path.isfile(rc_file):
4552			Message(_("Loading user configuration file"),1)
4553			RC_RAW = ConfigLines(file=rc_file).get_raw_config()
4554
4555		Debug("rc file: %s"%rc_file)
4556		Debug("rc file raw config: %s"%RC_RAW,1)
4557
4558	# get all infiles config (if any)
4559	infiles_config = get_infiles_config(infiles)
4560
4561	# is GUI available?
4562	# try to load and start GUI interface for --gui
4563	# if program was called with no arguments, try GUI also
4564	if GUI or not infiles:
4565		try:
4566			load_GUI_resources()
4567			Debug("GUI resources OK (Tk module is installed)")
4568			winbox = Gui()
4569			Debug("GUI display OK")
4570			GUI = 1
4571		except:
4572			Debug("GUI Error: no Tk module or no DISPLAY")
4573			GUI = 0
4574
4575	# user forced --gui, but it's not available
4576	if cmdline_parsed.get('gui') and not GUI:
4577		print getTraceback(); print
4578		Error("Sorry, I can't run my Graphical Interface - GUI\n"
4579		      "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
4580		      "- Make sure you are in a graphical environment (like X)")
4581
4582	# Okay, we will use GUI
4583	if GUI:
4584		Message(_("We are on GUI interface"),1)
4585
4586		# redefine Error function to raise exception instead sys.exit()
4587		def Error(msg):
4588			showerror(_('txt2tags ERROR!'), msg)
4589			raise error
4590
4591		# if no input file, get RC+cmdline config, else full config
4592		if not infiles:
4593			gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
4594		else:
4595			try   : gui_conf = infiles_config[0][0]
4596			except: gui_conf = {}
4597
4598		# sanity is needed to set outfile and other things
4599		gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
4600		Debug("GUI config: %s"%gui_conf,5)
4601
4602		# insert config and populate the nice window!
4603		winbox.load_config(gui_conf)
4604		winbox.mainwindow()
4605
4606	# console mode rocks forever!
4607	else:
4608		Message(_("We are on Command Line interface"),1)
4609
4610		# called with no arguments, show error
4611		if not infiles: Error(_('Missing input file (try --help)'))
4612
4613		convert_this_files(infiles_config)
4614
4615	Message(_("Txt2tags finished sucessfuly"),1)
4616
4617if __name__ == '__main__':
4618	try:
4619		exec_command_line()
4620	except error, msg:
4621		sys.stderr.write("%s\n"%msg)
4622		sys.stderr.flush()
4623		sys.exit(1)
4624	except SystemExit:
4625		pass
4626	except:
4627		print getUnknownErrorMessage()
4628	Quit()
4629
4630
4631# vim: ts=8
4632