1#!/usr/bin/env python 2# txt2tags - generic text conversion tool 3# http://txt2tags.sf.net 4# 5# Copyright 2001, 2002, 2003, 2004, 2005 Aurelio Marinho Jargas 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation, version 2. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You have received a copy of the GNU General Public License along 17# with this program, on the COPYING file. 18# 19# 20# 21# +-------------------------------------------------------------+ 22# | IMPORTANT MESSAGES, PLEASE READ | 23# +-------------------------------------------------------------+ 24# | | 25# | | 26# | v1.x COMPATIBILITY | 27# | ------------------ | 28# | | 29# | Due the major syntax changes, the new 2.x series | 30# | BREAKS backwards compatibility. | 31# | | 32# | Use the 't2tconv' script to upgrade your existing | 33# | v1.x files to conform the new v2.x syntax. | 34# | | 35# | Do a visual inspection on the new converted file. | 36# | Specially Pre & Post proc filters can break. | 37# | Check them! | 38# | | 39# | | 40# +-------------------------------------------------------------+ 41# 42# 43######################################################################## 44# 45# BORING CODE EXPLANATION AHEAD 46# 47# Just read if you wish to understand how the txt2tags code works 48# 49######################################################################## 50# 51# Version 2.0 was a complete rewrite for the program 'core'. 52# 53# Now the code that [1] parses the marked text is separated from the 54# code that [2] insert the target tags. 55# 56# [1] made by: def convert() 57# [2] made by: class BlockMaster 58# 59# The structures of the marked text are identifyed and its contents are 60# extracted into a data holder (Python lists and dictionaries). 61# 62# When parsing the source file, the blocks (para, lists, quote, table) 63# are opened with BlockMaster, right when found. Then its contents, 64# which spans on several lines, are feeded into a special holder on the 65# BlockMaster instance. Just when the block is closed, the target tags 66# are inserted for the full block as a whole, in one pass. This way, we 67# have a better control on blocks. Much better than the previous line by 68# line approach. 69# 70# In other words, whenever inside a block, the parser *holds* the tag 71# insertion process, waiting until the full block is readed. That was 72# needed primary to close paragraphs for the new XHTML target, but 73# proved to be a very good adding, improving many other processings. 74# 75# ------------------------------------------------------------------- 76# 77# There is also a brand new code for the Configuration schema, 100% 78# rewritten. There are new classes, all self documented: CommandLine, 79# SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW 80# Config format was created, and all kind of configuration is first 81# converted to this format, and then a generic method parses it. 82# 83# The init processing was changed also, and now the functions which 84# gets informations about the input files are: get_infiles_config(), 85# process_source_file() and convert_this_files() 86# 87# Other parts are untouched, and remains the same as in v1.7, as the 88# marks regexes, target Headers and target Tags&Rules. 89# 90######################################################################## 91 92# Now I think the code is nice, easier to read and understand 93 94#XXX Python coding warning 95# Avoid common mistakes: 96# - do NOT use newlist=list instead newlist=list[:] 97# - do NOT use newdic=dic instead newdic=dic.copy() 98# - do NOT use dic[key] instead dic.get(key) 99# - do NOT use del dic[key] without has_key() before 100 101#XXX Smart Image Align don't work if the image is a link 102# Can't fix that because the image is expanded together with the 103# link, at the linkbank filling moment. Only the image is passed 104# to parse_images(), not the full line, so it is always 'middle'. 105 106#XXX Paragraph separation not valid inside Quote 107# Quote will not have <p></p> inside, instead will close and open 108# again the <blockquote>. This really sux in CSS, when defining a 109# diferent background color. Still don't know how to fix it. 110 111#XXX TODO (maybe) 112# New mark or macro which expands to an anchor full title. 113# It is necessary to parse the full document in this order: 114# DONE 1st scan: HEAD: get all settings, including %!includeconf 115# DONE 2nd scan: BODY: expand includes & apply %!preproc 116# 3rd scan: BODY: read titles and compose TOC info 117# 4th scan: BODY: full parsing, expanding [#anchor] 1st 118# Steps 2 and 3 can be made together, with no tag adding. 119# Two complete body scans will be *slow*, don't know if it worths. 120# One solution may be add the titles as postproc rules 121 122 123############################################################################## 124 125# User config (1=ON, 0=OFF) 126 127USE_I18N = 1 # use gettext for i18ned messages? (default is 1) 128COLOR_DEBUG = 1 # show debug messages in colors? (default is 1) 129BG_LIGHT = 0 # your terminal background color is light (default is 0) 130HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0) 131 132############################################################################## 133 134 135# these are all the core Python modules used by txt2tags (KISS!) 136import re, string, os, sys, time, getopt 137 138# program information 139my_url = 'http://txt2tags.sf.net' 140my_name = 'txt2tags' 141my_email = 'verde@aurelio.net' 142my_version = '2.3' 143 144# i18n - just use if available 145if USE_I18N: 146 try: 147 import gettext 148 # if your locale dir is different, change it here 149 cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/') 150 _ = cat.gettext 151 except: 152 _ = lambda x:x 153else: 154 _ = lambda x:x 155 156# FLAGS : the conversion related flags , may be used in %!options 157# OPTIONS : the conversion related options, may be used in %!options 158# ACTIONS : the other behaviour modifiers, valid on command line only 159# MACROS : the valid macros with their default values for formatting 160# SETTINGS: global miscelaneous settings, valid on RC file only 161# NO_TARGET: actions that don't require a target specification 162# NO_MULTI_INPUT: actions that don't accept more than one input file 163# CONFIG_KEYWORDS: the valid %!key:val keywords 164# 165# FLAGS and OPTIONS are configs that affect the converted document. 166# They usually have also a --no-<option> to turn them OFF. 167# ACTIONS are needed because when doing multiple input files, strange 168# behaviour would be found, as use command line interface for the 169# first file and gui for the second. There is no --no-<action>. 170# --version and --help inside %!options are also odd 171# 172TARGETS = ['html', 'xhtml', 'sgml', 'tex', 'lout', 'man', 'mgp', 173 'moin', 'pm6' , 'txt'] 174FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 , 175 'toc-only' :0 , 'toc' :0 , 'rc' :1 , 176 'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 , 177 'quiet' :0 } 178OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'', 179 'infile' :'', 'outfile' :'', 'encoding' :'', 180 'config-file':'', 'split' :0 , 'lang' :''} 181ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 , 182 'verbose' :0 , 'debug' :0 , 'dump-config':0 , 183 'dump-source':0 } 184MACROS = {'date' : '%Y%m%d', 'infile': '%f', 185 'mtime': '%Y%m%d', 'outfile': '%f'} 186SETTINGS = {} # for future use 187NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source'] 188NO_MULTI_INPUT = ['gui','dump-config','dump-source'] 189CONFIG_KEYWORDS = [ 190 'target', 'encoding', 'style', 'options', 'preproc','postproc', 191 'guicolors'] 192TARGET_NAMES = { 193 'html' : _('HTML page'), 194 'xhtml': _('XHTML page'), 195 'sgml' : _('SGML document'), 196 'tex' : _('LaTeX document'), 197 'lout' : _('Lout document'), 198 'man' : _('UNIX Manual page'), 199 'mgp' : _('Magic Point presentation'), 200 'moin' : _('MoinMoin page'), 201 'pm6' : _('PageMaker 6.0 document'), 202 'txt' : _('Plain Text'), 203} 204 205DEBUG = 0 # do not edit here, please use --debug 206VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv 207QUIET = 0 # do not edit here, please use --quiet 208GUI = 0 # do not edit here, please use --gui 209AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc 210 211RC_RAW = [] 212CMDLINE_RAW = [] 213CONF = {} 214BLOCK = None 215regex = {} 216TAGS = {} 217rules = {} 218 219lang = 'english' 220TARGET = '' 221 222STDIN = STDOUT = '-' 223MODULEIN = MODULEOUT = '-module-' 224ESCCHAR = '\x00' 225SEPARATOR = '\x01' 226LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'} 227LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'} 228 229# plataform specific settings 230LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default'] 231 232# identify a development version 233#dev_suffix = '-dev'+time.strftime('%m%d',time.localtime(time.time())) 234#my_version = my_version + dev_suffix 235 236VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url) 237 238USAGE = string.join([ 239'', 240_("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name, 241'', 242_(" -t, --target=TYPE set target document type. currently supported:"), 243' %s' % re.sub(r"[]'[]",'',repr(TARGETS)), 244_(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"), 245_(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"), 246_(" -n, --enum-title enumerate all title lines as 1, 1.1, 1.1.1, etc"), 247_(" -H, --no-headers suppress header, title and footer contents"), 248_(" --headers show header, title and footer contents (default ON)"), 249_(" --encoding=ENC set target file encoding (utf-8, iso-8859-1, etc)"), 250_(" --style=FILE use FILE as the document style (like HTML CSS)"), 251_(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"), 252_(" --css-inside insert CSS file contents inside HTML/XHTML headers"), 253_(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"), 254_(" --toc add TOC (Table of Contents) to target document"), 255_(" --toc-only print document TOC and exit"), 256_(" --toc-level=N set maximum TOC level (depth) to N"), 257_(" -C, --config-file=F read config from file F"), 258_(" --rc read user config file ~/.txt2tagsrc (default ON)"), 259_(" --gui invoke Graphical Tk Interface"), 260_(" -q, --quiet quiet mode, suppress all output (except errors)"), 261_(" -v, --verbose print informative messages during conversion"), 262_(" -h, --help print this help information and exit"), 263_(" -V, --version print program version and exit"), 264_(" --dump-config print all the config found and exit"), 265_(" --dump-source print the document source, with includes expanded"), 266'', 267_("Turn OFF options:"), 268" --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers", 269" --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc", 270" --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config", 271" --no-dump-source", 272'', 273_("Example:\n %s -t html --toc myfile.t2t") % my_name, 274'', 275_("By default, converted output is saved to 'infile.<target>'."), 276_("Use --outfile to force an output file name."), 277_("If input file is '-', reads from STDIN."), 278_("If output file is '-', dumps output to STDOUT."), 279'' 280], '\n') 281 282 283############################################################################## 284 285 286# here is all the target's templates 287# you may edit them to fit your needs 288# - the %(HEADERn)s strings represent the Header lines 289# - the %(STYLE)s string is changed by --style contents 290# - the %(ENCODING)s string is changed by --encoding contents 291# - if any of the above is empty, the full line is removed 292# - use %% to represent a literal % 293# 294HEADER_TEMPLATE = { 295 'txt': """\ 296%(HEADER1)s 297%(HEADER2)s 298%(HEADER3)s 299""", 300 301 'sgml': """\ 302<!doctype linuxdoc system> 303<article> 304<title>%(HEADER1)s 305<author>%(HEADER2)s 306<date>%(HEADER3)s 307""", 308 309 'html': """\ 310<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> 311<HTML> 312<HEAD> 313<META NAME="generator" CONTENT="http://txt2tags.sf.net"> 314<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s"> 315<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s"> 316<TITLE>%(HEADER1)s</TITLE> 317</HEAD><BODY BGCOLOR="white" TEXT="black"> 318<P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1> 319<FONT SIZE="4"> 320<I>%(HEADER2)s</I><BR> 321%(HEADER3)s 322</FONT></CENTER> 323""", 324 325 'htmlcss': """\ 326<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> 327<HTML> 328<HEAD> 329<META NAME="generator" CONTENT="http://txt2tags.sf.net"> 330<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s"> 331<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s"> 332<TITLE>%(HEADER1)s</TITLE> 333</HEAD> 334<BODY> 335 336<DIV CLASS="header" ID="header"> 337<H1>%(HEADER1)s</H1> 338<H2>%(HEADER2)s</H2> 339<H3>%(HEADER3)s</H3> 340</DIV> 341""", 342 343 'xhtml': """\ 344<?xml version="1.0" 345 encoding="%(ENCODING)s" 346?> 347<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\ 348 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 349<html xmlns="http://www.w3.org/1999/xhtml"> 350<head> 351<title>%(HEADER1)s</title> 352<meta name="generator" content="http://txt2tags.sf.net" /> 353<link rel="stylesheet" type="text/css" href="%(STYLE)s" /> 354</head> 355<body bgcolor="white" text="black"> 356<div align="center"> 357<h1>%(HEADER1)s</h1> 358<h2>%(HEADER2)s</h2> 359<h3>%(HEADER3)s</h3> 360</div> 361""", 362 363 'xhtmlcss': """\ 364<?xml version="1.0"?> 365<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\ 366 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 367<html xmlns="http://www.w3.org/1999/xhtml"> 368<head> 369<title>%(HEADER1)s</title> 370<meta name="generator" content="http://txt2tags.sf.net" /> 371<meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" /> 372<link rel="stylesheet" type="text/css" href="%(STYLE)s" /> 373</head> 374<body> 375 376<div class="header" id="header"> 377<h1>%(HEADER1)s</h1> 378<h2>%(HEADER2)s</h2> 379<h3>%(HEADER3)s</h3> 380</div> 381""", 382 383 'man': """\ 384.TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s" 385""", 386 387# TODO style to <HR> 388 'pm6': """\ 389<PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0) 390><@Normal= 391 <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11> 392 <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3> 393 <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05> 394 <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0> 395 <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH"> 396 <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $> 397 <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25> 398><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light"> 399 <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")> 400><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0> 401 <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0> 402><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B> 403 <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left"> 404><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6> 405><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2> 406><@Title4=<@-PARENT "Title3"> 407><@Title5=<@-PARENT "Title3"> 408><@Quote=<@-PARENT "Normal"><SIZE 10><I>> 409 410%(HEADER1)s 411%(HEADER2)s 412%(HEADER3)s 413""", 414 415 'mgp': """\ 416#!/usr/X11R6/bin/mgp -t 90 417%%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1" 418%%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1" 419%%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1" 420%%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1" 421%%deffont "mono" xfont "courier-medium-r", charset "iso8859-1" 422%%default 1 size 5 423%%default 2 size 8, fore "yellow", font "normal-b", center 424%%default 3 size 5, fore "white", font "normal", left, prefix " " 425%%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill 426%%tab 2 prefix " ", icon arc "orange" 40, leftfill 427%%tab 3 prefix " ", icon arc "brown" 40, leftfill 428%%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill 429%%tab 5 prefix " ", icon arc "magenta" 40, leftfill 430%%%%------------------------- end of headers ----------------------------- 431%%page 432 433 434 435 436 437%%size 10, center, fore "yellow" 438%(HEADER1)s 439 440%%font "normal-i", size 6, fore "white", center 441%(HEADER2)s 442 443%%font "mono", size 7, center 444%(HEADER3)s 445""", 446 447 'moin': """\ 448'''%(HEADER1)s''' 449 450''%(HEADER2)s'' 451 452%(HEADER3)s 453""", 454 455 'tex': \ 456r"""\documentclass[11pt,a4paper]{scrbook} 457\usepackage{amsfonts,graphicx} 458\usepackage[pdfstartview=FitH,urlcolor=blue,colorlinks=true,bookmarks=true]{hyperref} 459\usepackage[%(ENCODING)s]{inputenc} %% char encoding 460\usepackage{%(STYLE)s} %% user defined package 461\tolerance=10000 462\usepackage{scrpage2} 463\pagestyle{scrheadings} 464\refoot{%(HEADER1)s} 465 466\title{%(HEADER1)s} 467\author{%(HEADER2)s} 468\begin{document} 469\date{%(HEADER3)s} 470\maketitle 471\clearpage 472""", 473 474 'lout': """\ 475@SysInclude { doc } 476@Document 477 @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ... 478 @PageOrientation { Portrait } # Portrait, Landscape 479 @ColumnNumber { 1 } # Number of columns (2, 3, ...) 480 @PageHeaders { Simple } # None, Simple, Titles, NoTitles 481 @InitialLanguage { English } # German, French, Portuguese, ... 482 @OptimizePages { Yes } # Yes/No smart page break feature 483// 484@Text @Begin 485@Display @Heading { %(HEADER1)s } 486@Display @I { %(HEADER2)s } 487@Display { %(HEADER3)s } 488#@NP # Break page after Headers 489""" 490# @SysInclude { tbl } # Tables support 491# setup: @MakeContents { Yes } # show TOC 492# setup: @SectionGap # break page at each section 493} 494 495 496############################################################################## 497 498 499def getTags(config): 500 "Returns all the known tags for the specified target" 501 502 keys = [ 503 'paragraphOpen','paragraphClose', 504 'title1','title2','title3','title4','title5', 505 'title1Open','title1Close','title2Open','title2Close', 506 'blocktitle1Open','title1Close','title2Open','title2Close', 507 'title3Open','title3Close','title4Open','title4Close', 508 'title5Open','title5Close', 509 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5', 510 'blockVerbOpen','blockVerbClose', 511 'blockQuoteOpen','blockQuoteClose','blockQuoteLine', 512 'fontMonoOpen','fontMonoClose', 513 'fontBoldOpen','fontBoldClose', 514 'fontItalicOpen','fontItalicClose', 515 'fontUnderlineOpen','fontUnderlineClose', 516 'listOpen','listClose', 517 'listItemOpen','listItemClose','listItemLine', 518 'numlistOpen','numlistClose', 519 'numlistItemOpen','numlistItemClose','numlistItemLine', 520 'deflistOpen','deflistClose', 521 'deflistItem1Open','deflistItem1Close', 522 'deflistItem2Open','deflistItem2Close', 523 'bar1','bar2', 524 'url','urlMark','email','emailMark', 525 'img','imgAlignLeft','imgAlignRight','imgAlignCenter', 526 'tableOpen','tableClose', 527 'tableRowOpen','tableRowClose','tableRowSep', 528 'tableCellOpen','tableCellClose','tableCellSep', 529 'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep', 530 'tableTitleRowOpen','tableTitleRowClose', 531 'tableBorder', 'tableAlignLeft', 'tableAlignCenter', 532 'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter', 533 'tableColAlignLeft','tableColAlignRight','tableColAlignCenter', 534 'tableColAlignSep', 'tableCellColSpan', 535 'anchor','comment','pageBreak', 536 'TOC','tocOpen','tocClose', 537 'cssOpen', 'cssClose', 538 'bodyOpen','bodyClose', 539 'EOD' 540 ] 541 542 # TIP: \a represents the current text on the mark 543 # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts 544 545 alltags = { 546 547 'txt': { 548 'title1' : ' \a' , 549 'title2' : '\t\a' , 550 'title3' : '\t\t\a' , 551 'title4' : '\t\t\t\a' , 552 'title5' : '\t\t\t\t\a', 553 'blockQuoteLine' : '\t' , 554 'listItemOpen' : '- ' , 555 'numlistItemOpen' : '\a. ' , 556 'bar1' : '\a' , 557 'url' : '\a' , 558 'urlMark' : '\a (\a)' , 559 'email' : '\a' , 560 'emailMark' : '\a (\a)' , 561 'img' : '[\a]' , 562 }, 563 564 'html': { 565 'paragraphOpen' : '<P>' , 566 'paragraphClose' : '</P>' , 567 'title1' : '<H1>\a~A~</H1>' , 568 'title2' : '<H2>\a~A~</H2>' , 569 'title3' : '<H3>\a~A~</H3>' , 570 'title4' : '<H4>\a~A~</H4>' , 571 'title5' : '<H5>\a~A~</H5>' , 572 'blockVerbOpen' : '<PRE>' , 573 'blockVerbClose' : '</PRE>' , 574 'blockQuoteOpen' : '<BLOCKQUOTE>' , 575 'blockQuoteClose' : '</BLOCKQUOTE>' , 576 'fontMonoOpen' : '<CODE>' , 577 'fontMonoClose' : '</CODE>' , 578 'fontBoldOpen' : '<B>' , 579 'fontBoldClose' : '</B>' , 580 'fontItalicOpen' : '<EM>' , 581 'fontItalicClose' : '</EM>' , 582 'fontUnderlineOpen' : '<U>' , 583 'fontUnderlineClose' : '</U>' , 584 'listOpen' : '<UL>' , 585 'listClose' : '</UL>' , 586 'listItemOpen' : '<LI>' , 587 'numlistOpen' : '<OL>' , 588 'numlistClose' : '</OL>' , 589 'numlistItemOpen' : '<LI>' , 590 'deflistOpen' : '<DL>' , 591 'deflistClose' : '</DL>' , 592 'deflistItem1Open' : '<DT>' , 593 'deflistItem1Close' : '</DT>' , 594 'deflistItem2Open' : '<DD>' , 595 'bar1' : '<HR NOSHADE SIZE=1>' , 596 'bar2' : '<HR NOSHADE SIZE=5>' , 597 'url' : '<A HREF="#\a">\a</A>' , 598 'urlMark' : '<A HREF="#\a">\a</A>' , 599 'email' : '<A HREF="mailto:\a">\a</A>' , 600 'emailMark' : '<A HREF="mailto:\a">\a</A>' , 601 'img' : '<IMG~A~ SRC="\a" BORDER="0" ALT="">', 602 'imgAlignLeft' : ' ALIGN="left"' , 603 'imgAlignCenter' : ' ALIGN="middle"', 604 'imgAlignRight' : ' ALIGN="right"' , 605 'tableOpen' : '<TABLE~A~ CELLPADDING="4"~B~>', 606 'tableClose' : '</TABLE>' , 607 'tableRowOpen' : '<TR>' , 608 'tableRowClose' : '</TR>' , 609 'tableCellOpen' : '<TD~A~~S~>' , 610 'tableCellClose' : '</TD>' , 611 'tableTitleCellOpen' : '<TH~S~>' , 612 'tableTitleCellClose' : '</TH>' , 613 'tableBorder' : ' BORDER="1"' , 614 'tableAlignCenter' : ' ALIGN="center"', 615 'tableCellAlignRight' : ' ALIGN="right"' , 616 'tableCellAlignCenter': ' ALIGN="center"', 617 'tableCellColSpan' : ' COLSPAN="\a"' , 618 'anchor' : '<A NAME="\a"></A>\n', 619 'cssOpen' : '<STYLE TYPE="text/css">', 620 'cssClose' : '</STYLE>' , 621 'comment' : '<!-- \a -->' , 622 'EOD' : '</BODY></HTML>' 623 }, 624 625 #TIP xhtml inherits all HTML definitions (lowercased) 626 #TIP http://www.w3.org/TR/xhtml1/#guidelines 627 #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm 628 'xhtml': { 629 'listItemClose' : '</li>' , 630 'numlistItemClose' : '</li>' , 631 'deflistItem2Close' : '</dd>' , 632 'bar1' : '<hr class="light" />', 633 'bar2' : '<hr class="heavy" />', 634 'anchor' : '<a id="\a" name="\a"></a>\n', 635 'img' : '<img~A~ src="\a" border="0" alt=""/>', 636 }, 637 638 'sgml': { 639 'paragraphOpen' : '<p>' , 640 'title1' : '<sect>\a~A~<p>' , 641 'title2' : '<sect1>\a~A~<p>' , 642 'title3' : '<sect2>\a~A~<p>' , 643 'title4' : '<sect3>\a~A~<p>' , 644 'title5' : '<sect4>\a~A~<p>' , 645 'blockVerbOpen' : '<tscreen><verb>' , 646 'blockVerbClose' : '</verb></tscreen>' , 647 'blockQuoteOpen' : '<quote>' , 648 'blockQuoteClose' : '</quote>' , 649 'fontMonoOpen' : '<tt>' , 650 'fontMonoClose' : '</tt>' , 651 'fontBoldOpen' : '<bf>' , 652 'fontBoldClose' : '</bf>' , 653 'fontItalicOpen' : '<em>' , 654 'fontItalicClose' : '</em>' , 655 'fontUnderlineOpen' : '<bf><em>' , 656 'fontUnderlineClose' : '</em></bf>' , 657 'listOpen' : '<itemize>' , 658 'listClose' : '</itemize>' , 659 'listItemOpen' : '<item>' , 660 'numlistOpen' : '<enum>' , 661 'numlistClose' : '</enum>' , 662 'numlistItemOpen' : '<item>' , 663 'deflistOpen' : '<descrip>' , 664 'deflistClose' : '</descrip>' , 665 'deflistItem1Open' : '<tag>' , 666 'deflistItem1Close' : '</tag>' , 667 'bar1' : '<!-- \a -->' , 668 'url' : '<htmlurl url="\a" name="\a">' , 669 'urlMark' : '<htmlurl url="\a" name="\a">' , 670 'email' : '<htmlurl url="mailto:\a" name="\a">' , 671 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' , 672 'img' : '<figure><ph vspace=""><img src="\a">'+\ 673 '</figure>' , 674 'tableOpen' : '<table><tabular ca="~C~">' , 675 'tableClose' : '</tabular></table>' , 676 'tableRowSep' : '<rowsep>' , 677 'tableCellSep' : '<colsep>' , 678 'tableColAlignLeft' : 'l' , 679 'tableColAlignRight' : 'r' , 680 'tableColAlignCenter' : 'c' , 681 'comment' : '<!-- \a -->' , 682 'anchor' : '<label id="\a">' , 683 'TOC' : '<toc>' , 684 'EOD' : '</article>' 685 }, 686 687 'tex': { 688 'title1' : '\n\chapter*{\a}~A~' , 689 'title2' : '\n\section*{\a}~A~' , 690 'title3' : '\\subsection*{\a}~A~' , 691 'title4' : '\\subsubsection*{\a}~A~', 692 # title 4/5: DIRTY: para+BF+\\+\n 693 'title5' : '\\paragraph{}\\textbf{\a}~A~\\\\\n', 694 'title6' : '\\paragraph{}\\textbf{\a}~A~\\\\\n', 695 'numtitle1' : '\n\chapter{\a}~A~' , 696 'numtitle2' : '\n\section{\a}~A~' , 697 'numtitle3' : '\\subsection{\a}~A~' , 698 'numtitle4' : '\\subsubsection{\a}~A~' , 699 'blockVerbOpen' : '\\begin{verbatim}' , 700 'blockVerbClose' : '\\end{verbatim}' , 701 'blockQuoteOpen' : '\\begin{quotation}' , 702 'blockQuoteClose' : '\\end{quotation}' , 703 'fontMonoOpen' : '\\texttt{' , 704 'fontMonoClose' : '}' , 705 'fontBoldOpen' : '\\textbf{' , 706 'fontBoldClose' : '}' , 707 'fontItalicOpen' : '\\textit{' , 708 'fontItalicClose' : '}' , 709 'fontUnderlineOpen' : '\\underline{' , 710 'fontUnderlineClose' : '}' , 711 'listOpen' : '\\begin{itemize}' , 712 'listClose' : '\\end{itemize}' , 713 'listItemOpen' : '\\item ' , 714 'numlistOpen' : '\\begin{enumerate}' , 715 'numlistClose' : '\\end{enumerate}' , 716 'numlistItemOpen' : '\\item ' , 717 'deflistOpen' : '\\begin{description}', 718 'deflistClose' : '\\end{description}' , 719 'deflistItem1Open' : '\\item[' , 720 'deflistItem1Close' : ']' , 721 'bar1' : '\n\\hrulefill{}\n' , 722 'bar2' : '\n\\rule{\linewidth}{1mm}\n', 723 'url' : '\\htmladdnormallink{\a}{\a}', 724 'urlMark' : '\a (\\ref{\a})', 725 'email' : '\\htmladdnormallink{\a}{mailto:\a}', 726 'emailMark' : '\\htmladdnormallink{\a}{mailto:\a}', 727 'img' : '\\includegraphics{\a}', 728 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}', 729 'tableClose' : '\\end{tabular}\\end{center}', 730 'tableRowOpen' : '\\hline ' , 731 'tableRowClose' : ' \\\\' , 732 'tableCellSep' : ' & ' , 733 'tableColAlignLeft' : 'l' , 734 'tableColAlignRight' : 'r' , 735 'tableColAlignCenter' : 'c' , 736 'tableColAlignSep' : '|' , 737 'comment' : '% \a' , 738 'anchor' : '\\label{\a}', 739 'TOC' : '\\tableofcontents', 740 'pageBreak' : '\\clearpage', 741 'EOD' : '\\end{document}' 742 }, 743 744 'lout': { 745 'paragraphOpen' : '@LP' , 746 'blockTitle1Open' : '@BeginSections' , 747 'blockTitle1Close' : '@EndSections' , 748 'blockTitle2Open' : ' @BeginSubSections' , 749 'blockTitle2Close' : ' @EndSubSections' , 750 'blockTitle3Open' : ' @BeginSubSubSections' , 751 'blockTitle3Close' : ' @EndSubSubSections' , 752 'title1Open' : '\n@Section @Title { \a } @Begin', 753 'title1Close' : '@End @Section' , 754 'title2Open' : '\n @SubSection @Title { \a } @Begin', 755 'title2Close' : ' @End @SubSection' , 756 'title3Open' : '\n @SubSubSection @Title { \a } @Begin', 757 'title3Close' : ' @End @SubSubSection' , 758 'title4Open' : '\n@LP @LeftDisplay @B { \a }', 759 'title5Open' : '\n@LP @LeftDisplay @B { \a }', 760 'anchor' : '@Tag { \a }' , 761 'blockVerbOpen' : '@LP @ID @F @RawVerbatim @Begin', 762 'blockVerbClose' : '@End @RawVerbatim' , 763 'blockQuoteOpen' : '@QD {' , 764 'blockQuoteClose' : '}' , 765 # enclosed inside {} to deal with joined**words** 766 'fontMonoOpen' : '{@F {' , 767 'fontMonoClose' : '}}' , 768 'fontBoldOpen' : '{@B {' , 769 'fontBoldClose' : '}}' , 770 'fontItalicOpen' : '{@II {' , 771 'fontItalicClose' : '}}' , 772 'fontUnderlineOpen' : '{@Underline{' , 773 'fontUnderlineClose' : '}}' , 774 # the full form is more readable, but could be BL EL LI NL TL DTI 775 'listOpen' : '@BulletList' , 776 'listClose' : '@EndList' , 777 'listItemOpen' : '@ListItem{' , 778 'listItemClose' : '}' , 779 'numlistOpen' : '@NumberedList' , 780 'numlistClose' : '@EndList' , 781 'numlistItemOpen' : '@ListItem{' , 782 'numlistItemClose' : '}' , 783 'deflistOpen' : '@TaggedList' , 784 'deflistClose' : '@EndList' , 785 'deflistItem1Open' : '@DropTagItem {' , 786 'deflistItem1Close' : '}' , 787 'deflistItem2Open' : '{' , 788 'deflistItem2Close' : '}' , 789 'bar1' : '\n@DP @FullWidthRule\n' , 790 'url' : '{blue @Colour { \a }}' , 791 'urlMark' : '\a ({blue @Colour { \a }})' , 792 'email' : '{blue @Colour { \a }}' , 793 'emailMark' : '\a ({blue Colour{ \a }})' , 794 'img' : '~A~@IncludeGraphic { \a }' , # eps only! 795 'imgAlignLeft' : '@LeftDisplay ' , 796 'imgAlignRight' : '@RightDisplay ' , 797 'imgAlignCenter' : '@CentredDisplay ' , 798 # lout tables are *way* complicated, no support for now 799 #'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {', 800 #'tableClose' : '}' , 801 #'tableRowOpen' : '@Rowa\n' , 802 #'tableTitleRowOpen' : '@HeaderRowa' , 803 #'tableCenterAlign' : '@CentredDisplay ' , 804 #'tableCellOpen' : '\a {' , # A, B, ... 805 #'tableCellClose' : '}' , 806 #'tableBorder' : '\nrule {yes}' , 807 'comment' : '# \a' , 808 # @MakeContents must be on the config file 809 'TOC' : '@DP @ContentsGoesHere @DP', 810 'pageBreak' : '\n@NP\n' , 811 'EOD' : '@End @Text' 812 }, 813 814 'moin': { 815 'title1' : '= \a =' , 816 'title2' : '== \a ==' , 817 'title3' : '=== \a ===' , 818 'title4' : '==== \a ====' , 819 'title5' : '===== \a =====', 820 'blockVerbOpen' : '{{{' , 821 'blockVerbClose' : '}}}' , 822 'blockQuoteLine' : ' ' , 823 'fontMonoOpen' : '{{{' , 824 'fontMonoClose' : '}}}' , 825 'fontBoldOpen' : "'''" , 826 'fontBoldClose' : "'''" , 827 'fontItalicOpen' : "''" , 828 'fontItalicClose' : "''" , 829 'fontUnderlineOpen' : "__" , 830 'fontUnderlineClose' : "__" , 831 'listItemOpen' : ' * ' , 832 'numlistItemOpen' : ' \a. ' , 833 'bar1' : '----' , 834 'url' : '[\a]' , 835 'urlMark' : '[\a \a]' , 836 'email' : '[\a]' , 837 'emailMark' : '[\a \a]' , 838 'img' : '[\a]' , 839 'tableRowOpen' : '||' , 840 'tableCellOpen' : '~A~' , 841 'tableCellClose' : '||' , 842 'tableTitleCellClose' : '||' , 843 'tableCellAlignRight' : '<)>' , 844 'tableCellAlignCenter': '<:>' , 845 'comment' : '## \a' , 846 'TOC' : '[[TableOfContents]]' 847 }, 848 849 'mgp': { 850 'paragraphOpen' : '%font "normal", size 5' , 851 'title1' : '%page\n\n\a\n' , 852 'title2' : '%page\n\n\a\n' , 853 'title3' : '%page\n\n\a\n' , 854 'title4' : '%page\n\n\a\n' , 855 'title5' : '%page\n\n\a\n' , 856 'blockVerbOpen' : '%font "mono"' , 857 'blockVerbClose' : '%font "normal"' , 858 'blockQuoteOpen' : '%prefix " "' , 859 'blockQuoteClose' : '%prefix " "' , 860 'fontMonoOpen' : '\n%cont, font "mono"\n' , 861 'fontMonoClose' : '\n%cont, font "normal"\n' , 862 'fontBoldOpen' : '\n%cont, font "normal-b"\n' , 863 'fontBoldClose' : '\n%cont, font "normal"\n' , 864 'fontItalicOpen' : '\n%cont, font "normal-i"\n' , 865 'fontItalicClose' : '\n%cont, font "normal"\n' , 866 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' , 867 'fontUnderlineClose' : '\n%cont, fore "white"\n' , 868 'listItemLine' : '\t' , 869 'numlistItemLine' : '\t' , 870 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n', 871 'deflistItem1Close' : '\n%cont, font "normal"\n' , 872 'bar1' : '%bar "white" 5' , 873 'bar2' : '%pause' , 874 'url' : '\n%cont, fore "cyan"\n\a' +\ 875 '\n%cont, fore "white"\n' , 876 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\ 877 '\n%cont, fore "white"\n' , 878 'email' : '\n%cont, fore "cyan"\n\a' +\ 879 '\n%cont, fore "white"\n' , 880 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\ 881 '\n%cont, fore "white"\n' , 882 'img' : '~A~\n%newimage "\a"\n%left\n', 883 'imgAlignLeft' : '\n%left' , 884 'imgAlignRight' : '\n%right' , 885 'imgAlignCenter' : '\n%center' , 886 'comment' : '%% \a' , 887 'pageBreak' : '%page\n\n\n' , 888 'EOD' : '%%EOD' 889 }, 890 891 # man groff_man ; man 7 groff 892 'man': { 893 'paragraphOpen' : '.P' , 894 'title1' : '.SH \a' , 895 'title2' : '.SS \a' , 896 'title3' : '.SS \a' , 897 'title4' : '.SS \a' , 898 'title5' : '.SS \a' , 899 'blockVerbOpen' : '.nf' , 900 'blockVerbClose' : '.fi\n' , 901 'blockQuoteOpen' : '.RS' , 902 'blockQuoteClose' : '.RE' , 903 'fontBoldOpen' : '\\fB' , 904 'fontBoldClose' : '\\fR' , 905 'fontItalicOpen' : '\\fI' , 906 'fontItalicClose' : '\\fR' , 907 'listOpen' : '.RS' , 908 'listItemOpen' : '.IP \(bu 3\n', 909 'listClose' : '.RE' , 910 'numlistOpen' : '.RS' , 911 'numlistItemOpen' : '.IP \a. 3\n', 912 'numlistClose' : '.RE' , 913 'deflistItem1Open' : '.TP\n' , 914 'bar1' : '\n\n' , 915 'url' : '\a' , 916 'urlMark' : '\a (\a)', 917 'email' : '\a' , 918 'emailMark' : '\a (\a)', 919 'img' : '\a' , 920 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.', 921 'tableClose' : '.TE' , 922 'tableRowOpen' : ' ' , 923 'tableCellSep' : '^' , 924 'tableAlignCenter' : 'center, ', 925 'tableBorder' : 'allbox, ', 926 'tableColAlignLeft' : 'l' , 927 'tableColAlignRight' : 'r' , 928 'tableColAlignCenter' : 'c' , 929 'comment' : '.\\" \a' 930 }, 931 932 'pm6': { 933 'paragraphOpen' : '<@Normal:>' , 934 'title1' : '\n<@Title1:>\a', 935 'title2' : '\n<@Title2:>\a', 936 'title3' : '\n<@Title3:>\a', 937 'title4' : '\n<@Title4:>\a', 938 'title5' : '\n<@Title5:>\a', 939 'blockVerbOpen' : '<@PreFormat:>' , 940 'blockQuoteLine' : '<@Quote:>' , 941 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' , 942 'fontMonoClose' : '<SIZE$><FONT$>', 943 'fontBoldOpen' : '<B>' , 944 'fontBoldClose' : '<P>' , 945 'fontItalicOpen' : '<I>' , 946 'fontItalicClose' : '<P>' , 947 'fontUnderlineOpen' : '<U>' , 948 'fontUnderlineClose' : '<P>' , 949 'listOpen' : '<@Bullet:>' , 950 'listItemOpen' : '\x95\t' , # \x95 == ~U 951 'numlistOpen' : '<@Bullet:>' , 952 'numlistItemOpen' : '\x95\t' , 953 'bar1' : '\a' , 954 'url' : '<U>\a<P>' , # underline 955 'urlMark' : '\a <U>\a<P>' , 956 'email' : '\a' , 957 'emailMark' : '\a \a' , 958 'img' : '\a' 959 } 960 } 961 962 # exceptions for --css-sugar 963 if config['css-sugar'] and config['target'] in ('html','xhtml'): 964 # change just HTML because XHTML inherits it 965 htmltags = alltags['html'] 966 # table with no cellpadding 967 htmltags['tableOpen'] = string.replace( 968 htmltags['tableOpen'], ' CELLPADDING="4"', '') 969 # DIVs 970 htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">' 971 htmltags['tocClose'] = '</DIV>' 972 htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">' 973 htmltags['bodyClose']= '</DIV>' 974 975 # make the HTML -> XHTML inheritance 976 xhtml = alltags['html'].copy() 977 for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key]) 978 # some like HTML tags as lowercase, some don't... (headers out) 979 if HTML_LOWER: alltags['html'] = xhtml.copy() 980 xhtml.update(alltags['xhtml']) 981 alltags['xhtml'] = xhtml.copy() 982 983 # compose the target tags dictionary 984 tags = {} 985 target_tags = alltags[config['target']].copy() 986 987 for key in keys: tags[key] = '' # create empty keys 988 for key in target_tags.keys(): 989 tags[key] = maskEscapeChar(target_tags[key]) # populate 990 991 # map strong line to separator if not defined 992 if not tags['bar2'] and tags['bar1']: 993 tags['bar2'] = tags['bar1'] 994 995 return tags 996 997 998############################################################################## 999 1000 1001def getRules(config): 1002 "Returns all the target-specific syntax rules" 1003 1004 ret = {} 1005 allrules = [ 1006 1007 # target rules (ON/OFF) 1008 'linkable', # target supports external links 1009 'tableable', # target supports tables 1010 'imglinkable', # target supports images as links 1011 'imgalignable', # target supports image alignment 1012 'imgasdefterm', # target supports image as definition term 1013 'autonumberlist', # target supports numbered lists natively 1014 'autonumbertitle', # target supports numbered titles natively 1015 'parainsidelist', # lists items supports paragraph 1016 'spacedlistitem', # lists support blank lines between items 1017 'listnotnested', # lists cannot be nested 1018 'quotenotnested', # quotes cannot be nested 1019 'verbblocknotescaped', # don't escape specials in verb block 1020 'verbblockfinalescape', # do final escapes in verb block 1021 'escapeurl', # escape special in link URL 1022 'onelinepara', # dump paragraph as a single long line 1023 'tabletitlerowinbold', # manually bold any cell on table titles 1024 'tablecellstrip', # strip extra spaces from each table cell 1025 'tablecellspannable', # the table cells can have span attribute 1026 'barinsidequote', # bars are allowed inside quote blocks 1027 'finalescapetitle', # perform final escapes on title lines 1028 'autotocnewpagebefore', # break page before automatic TOC 1029 'autotocnewpageafter', # break page after automatic TOC 1030 'autotocwithbars', # automatic TOC surrounded by bars 1031 'mapbar2pagebreak', # map the strong bar to a page break 1032 'titleblocks', # titles must be on open/close section blocks 1033 1034 # target code beautify (ON/OFF) 1035 'indentverbblock', # add leading spaces to verb block lines 1036 'breaktablecell', # break lines after any table cell 1037 'breaktablelineopen', # break line after opening table line 1038 'notbreaklistopen', # don't break line after opening a new list 1039 'notbreakparaopen', # don't break line after opening a new para 1040 'keepquoteindent', # don't remove the leading TABs on quotes 1041 'keeplistindent', # don't remove the leading spaces on lists 1042 'blankendmotherlist', # append a blank line at the mother list end 1043 'blankendtable', # append a blank line at the table end 1044 'blankendautotoc', # append a blank line at the auto TOC end 1045 'tagnotindentable', # tags must be placed at the line begining 1046 1047 # value settings 1048 'listmaxdepth', # maximum depth for lists 1049 'tablecellaligntype' # type of table cell align: cell, column 1050 ] 1051 1052 rules_bank = { 1053 'txt' : { 1054 'indentverbblock':1, 1055 'spacedlistitem':1, 1056 'parainsidelist':1, 1057 'keeplistindent':1, 1058 'barinsidequote':1, 1059 'autotocwithbars':1, 1060 'blankendmotherlist':1 1061 }, 1062 'html': { 1063 'indentverbblock':1, 1064 'linkable':1, 1065 'escapeurl':1, 1066 'imglinkable':1, 1067 'imgalignable':1, 1068 'imgasdefterm':1, 1069 'autonumberlist':1, 1070 'spacedlistitem':1, 1071 'parainsidelist':1, 1072 'blankendmotherlist':1, 1073 'tableable':1, 1074 'tablecellstrip':1, 1075 'blankendtable':1, 1076 'breaktablecell':1, 1077 'breaktablelineopen':1, 1078 'keeplistindent':1, 1079 'keepquoteindent':1, 1080 'barinsidequote':1, 1081 'autotocwithbars':1, 1082 'tablecellspannable':1, 1083 'tablecellaligntype':'cell' 1084 }, 1085 #TIP xhtml inherits all HTML rules 1086 'xhtml': { 1087 }, 1088 'sgml': { 1089 'linkable':1, 1090 'escapeurl':1, 1091 'autonumberlist':1, 1092 'spacedlistitem':1, 1093 'blankendmotherlist':1, 1094 'tableable':1, 1095 'tablecellstrip':1, 1096 'blankendtable':1, 1097 'blankendautotoc':1, 1098 'quotenotnested':1, 1099 'keeplistindent':1, 1100 'keepquoteindent':1, 1101 'barinsidequote':1, 1102 'finalescapetitle':1, 1103 'tablecellaligntype':'column' 1104 }, 1105 'mgp' : { 1106 'blankendmotherlist':1, 1107 'tagnotindentable':1, 1108 'spacedlistitem':1, 1109 'imgalignable':1, 1110 'autotocnewpagebefore':1, 1111 }, 1112 'tex' : { 1113 'imgasdefterm':1, 1114 'autonumberlist':1, 1115 'autonumbertitle':1, 1116 'spacedlistitem':1, 1117 'blankendmotherlist':1, 1118 'tableable':1, 1119 'tablecellstrip':1, 1120 'tabletitlerowinbold':1, 1121 'blankendtable':1, 1122 'verbblocknotescaped':1, 1123 'keeplistindent':1, 1124 'listmaxdepth':4, 1125 'barinsidequote':1, 1126 'finalescapetitle':1, 1127 'autotocnewpageafter':1, 1128 'mapbar2pagebreak':1, 1129 'tablecellaligntype':'column' 1130 }, 1131 'lout': { 1132 'keepquoteindent':1, 1133 'escapeurl':1, 1134 'verbblocknotescaped':1, 1135 'tableable':0, 1136 'imgalignable':1, 1137 'mapbar2pagebreak':1, 1138 'titleblocks':1, 1139 'notbreakparaopen':1 1140 }, 1141 'moin': { 1142 'spacedlistitem':1, 1143 'linkable':1, 1144 'blankendmotherlist':1, 1145 'keeplistindent':1, 1146 'tableable':1, 1147 'barinsidequote':1, 1148 'blankendtable':1, 1149 'tabletitlerowinbold':1, 1150 'tablecellstrip':1, 1151 'autotocwithbars':1, 1152 'tablecellaligntype':'cell' 1153 }, 1154 'man' : { 1155 'spacedlistitem':1, 1156 'indentverbblock':1, 1157 'blankendmotherlist':1, 1158 'tagnotindentable':1, 1159 'tableable':1, 1160 'tablecellaligntype':'column', 1161 'tabletitlerowinbold':1, 1162 'tablecellstrip':1, 1163 'blankendtable':1, 1164 'keeplistindent':0, 1165 'barinsidequote':1, 1166 'parainsidelist':0, 1167 }, 1168 'pm6' : { 1169 'keeplistindent':1, 1170 'verbblockfinalescape':1, 1171 #TODO add support for these - maybe set a JOINNEXT char and 1172 # do it on addLineBreaks() 1173 'notbreaklistopen':1, 1174 'notbreakparaopen':1, 1175 'barinsidequote':1, 1176 'autotocwithbars':1, 1177 'onelinepara':1, 1178 } 1179 } 1180 1181 # exceptions for --css-sugar 1182 if config['css-sugar'] and config['target'] in ('html','xhtml'): 1183 rules_bank['html']['indentverbblock'] = 0 1184 rules_bank['html']['autotocwithbars'] = 0 1185 1186 # get the target specific rules 1187 if config['target'] == 'xhtml': 1188 myrules = rules_bank['html'].copy() # inheritance 1189 myrules.update(rules_bank['xhtml']) # get XHTML specific 1190 else: 1191 myrules = rules_bank[config['target']].copy() 1192 1193 # populate return dictionary 1194 for key in allrules: ret[key] = 0 # reset all 1195 ret.update(myrules) # get rules 1196 1197 return ret 1198 1199 1200############################################################################## 1201 1202 1203def getRegexes(): 1204 "Returns all the regexes used to find the t2t marks" 1205 1206 bank = { 1207 'blockVerbOpen': 1208 re.compile(r'^```\s*$'), 1209 'blockVerbClose': 1210 re.compile(r'^```\s*$'), 1211 'blockRawOpen': 1212 re.compile(r'^"""\s*$'), 1213 'blockRawClose': 1214 re.compile(r'^"""\s*$'), 1215 'quote': 1216 re.compile(r'^\t+'), 1217 '1lineVerb': 1218 re.compile(r'^``` (?=.)'), 1219 '1lineRaw': 1220 re.compile(r'^""" (?=.)'), 1221 # mono, raw, bold, italic, underline: 1222 # - marks must be glued with the contents, no boundary spaces 1223 # - they are greedy, so in ****bold****, turns to <b>**bold**</b> 1224 'fontMono': 1225 re.compile( r'``([^\s](|.*?[^\s])`*)``'), 1226 'raw': 1227 re.compile( r'""([^\s](|.*?[^\s])"*)""'), 1228 'fontBold': 1229 re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'), 1230 'fontItalic': 1231 re.compile( r'//([^\s](|.*?[^\s])/*)//'), 1232 'fontUnderline': 1233 re.compile( r'__([^\s](|.*?[^\s])_*)__'), 1234 'list': 1235 re.compile(r'^( *)(-) (?=[^ ])'), 1236 'numlist': 1237 re.compile(r'^( *)(\+) (?=[^ ])'), 1238 'deflist': 1239 re.compile(r'^( *)(:) (.*)$'), 1240 'listclose': 1241 re.compile(r'^( *)([-+:])\s*$'), 1242 'bar': 1243 re.compile(r'^(\s*)([_=-]{20,})\s*$'), 1244 'table': 1245 re.compile(r'^ *\|\|? '), 1246 'blankline': 1247 re.compile(r'^\s*$'), 1248 'comment': 1249 re.compile(r'^%'), 1250 1251 # auxiliar tag regexes 1252 '_imgAlign' : re.compile(r'~A~', re.I), 1253 '_tableAlign' : re.compile(r'~A~', re.I), 1254 '_anchor' : re.compile(r'~A~', re.I), 1255 '_tableBorder' : re.compile(r'~B~', re.I), 1256 '_tableColAlign' : re.compile(r'~C~', re.I), 1257 '_tableCellColSpan': re.compile(r'~S~', re.I), 1258 '_tableCellAlign' : re.compile(r'~A~', re.I), 1259 } 1260 1261 # special char to place data on TAGs contents (\a == bell) 1262 bank['x'] = re.compile('\a') 1263 1264 # %%macroname [ (formatting) ] 1265 bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%( 1266 string.join(MACROS.keys(), '|')), re.I) 1267 1268 # %%TOC special macro for TOC positioning 1269 bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I) 1270 1271 # almost complicated title regexes ;) 1272 titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$' 1273 bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])')) 1274 bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])')) 1275 1276 ### complicated regexes begin here ;) 1277 # 1278 # textual descriptions on --help's style: [...] is optional, | is OR 1279 1280 1281 ### first, some auxiliar variables 1282 # 1283 1284 # [image.EXT] 1285 patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]' 1286 1287 # link things 1288 urlskel = { 1289 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://', 1290 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess 1291 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com 1292 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com 1293 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D 1294 'anchor': r'A-Za-z0-9%._-', # %nn(encoded) 1295 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is) 1296 'punct' : r'.,;:!?' 1297 } 1298 1299 # username [ :password ] @ 1300 patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass']) 1301 1302 # [ http:// ] [ username:password@ ] domain.com [ / ] 1303 # [ #anchor | ?form=data ] 1304 retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%( 1305 urlskel['proto'],patt_url_login, urlskel['guess'], 1306 urlskel['chars'],urlskel['form'],urlskel['anchor']) 1307 1308 # filename | [ filename ] #anchor 1309 retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%( 1310 urlskel['chars'],urlskel['chars'],urlskel['anchor']) 1311 1312 # user@domain [ ?form=data ] 1313 patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%( 1314 urlskel['login'],urlskel['form']) 1315 1316 # saving for future use 1317 bank['_urlskel'] = urlskel 1318 1319 ### and now the real regexes 1320 # 1321 1322 bank['email'] = re.compile(patt_email,re.I) 1323 1324 # email | url 1325 bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I) 1326 1327 # \[ label | imagetag url | email | filename \] 1328 bank['linkmark'] = re.compile( 1329 r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%( 1330 patt_img, retxt_url, patt_email, retxt_url_local), 1331 re.L+re.I) 1332 1333 # image 1334 bank['img'] = re.compile(patt_img, re.L+re.I) 1335 1336 # special things 1337 bank['special'] = re.compile(r'^%!\s*') 1338 return bank 1339### END OF regex nightmares 1340 1341 1342############################################################################## 1343 1344class error(Exception): 1345 pass 1346def echo(msg): # for quick debug 1347 print '\033[32;1m%s\033[m'%msg 1348def Quit(msg=''): 1349 if msg: print msg 1350 sys.exit(0) 1351def Error(msg): 1352 msg = _("%s: Error: ")%my_name + msg 1353 raise error, msg 1354def getTraceback(): 1355 try: 1356 from traceback import format_exception 1357 etype, value, tb = sys.exc_info() 1358 return string.join(format_exception(etype, value, tb), '') 1359 except: pass 1360def getUnknownErrorMessage(): 1361 msg = '%s\n%s (%s):\n\n%s'%( 1362 _('Sorry! Txt2tags aborted by an unknow error.'), 1363 _('Please send the following Error Traceback to the author'), 1364 my_email, getTraceback()) 1365 return msg 1366def Message(msg,level): 1367 if level <= VERBOSE and not QUIET: 1368 prefix = '-'*5 1369 print "%s %s"%(prefix*level, msg) 1370def Debug(msg,id=0,linenr=None): 1371 "Show debug messages, categorized (colored or not)" 1372 if QUIET or not DEBUG: return 1373 if int(id) not in range(8): id = 0 1374 # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light 1375 ids = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET'] 1376 colors_bgdark = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1'] 1377 colors_bglight = ['0' ,'1' ,'3' ,'6' ,'4' ,'5' ,'2' ,'0' ] 1378 if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg) 1379 if COLOR_DEBUG: 1380 if BG_LIGHT: color = colors_bglight[id] 1381 else : color = colors_bgdark[id] 1382 msg = '\033[3%sm%s\033[m'%(color,msg) 1383 print "++ %s: %s"%(ids[id],msg) 1384def Readfile(file, remove_linebreaks=0, ignore_error=0): 1385 data = [] 1386 if file == '-': 1387 try: data = sys.stdin.readlines() 1388 except: 1389 if not ignore_error: 1390 Error(_('You must feed me with data on STDIN!')) 1391 else: 1392 try: f = open(file); data = f.readlines() ; f.close() 1393 except: 1394 if not ignore_error: 1395 Error(_("Cannot read file:")+" %s"%file) 1396 if remove_linebreaks: 1397 data = map(lambda x:re.sub('[\n\r]+$','',x), data) 1398 Message(_("Readed file (%d lines): %s")%(len(data),file),2) 1399 return data 1400def Savefile(file, contents): 1401 try: f = open(file, 'wb') 1402 except: Error(_("Cannot open file for writing:")+" %s"%file) 1403 if type(contents) == type([]): doit = f.writelines 1404 else: doit = f.write 1405 doit(contents) ; f.close() 1406 1407def showdic(dic): 1408 for k in dic.keys(): print "%15s : %s" % (k,dic[k]) 1409def dotted_spaces(txt=''): 1410 return string.replace(txt,' ','.') 1411 1412# TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html 1413def get_rc_path(): 1414 "Return the full path for the users' RC file" 1415 # try to get the path from an env var. if yes, we're done 1416 user_defined = os.environ.get('T2TCONFIG') 1417 if user_defined: return user_defined 1418 # env var not found, so perform automatic path composing 1419 # set default filename according system platform 1420 rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'} 1421 rc_file = rc_names.get(sys.platform[:3]) or rc_names['default'] 1422 # the file must be on the user directory, but where is this dir? 1423 rc_dir_search = ['HOME', 'HOMEPATH'] 1424 for var in rc_dir_search: 1425 rc_dir = os.environ.get(var) 1426 if rc_dir: break 1427 # rc dir found, now we must join dir+file to compose the full path 1428 if rc_dir: 1429 # compose path and return it if the file exists 1430 rc_path = os.path.join(rc_dir, rc_file) 1431 # on windows, prefix with the drive (%homedrive%: 2k/XP/NT) 1432 if sys.platform[:3] == 'win': 1433 rc_drive = os.environ.get('HOMEDRIVE') 1434 rc_path = os.path.join(rc_drive,rc_path) 1435 return rc_path 1436 # sorry, not found 1437 return '' 1438 1439 1440 1441############################################################################## 1442 1443class CommandLine: 1444 """ 1445 Command Line class - Masters command line 1446 1447 This class checks and extract data from the provided command line. 1448 The --long options and flags are taken from the global OPTIONS, 1449 FLAGS and ACTIONS dictionaries. The short options are registered 1450 here, and also their equivalence to the long ones. 1451 1452 METHODS: 1453 _compose_short_opts() -> str 1454 _compose_long_opts() -> list 1455 Compose the valid short and long options list, on the 1456 'getopt' format. 1457 1458 parse() -> (opts, args) 1459 Call getopt to check and parse the command line. 1460 It expects to receive the command line as a list, and 1461 without the program name (sys.argv[1:]). 1462 1463 get_raw_config() -> [RAW config] 1464 Scans command line and convert the data to the RAW config 1465 format. See ConfigMaster class to the RAW format description. 1466 Optional 'ignore' and 'filter' arguments are used to filter 1467 in or out specified keys. 1468 1469 compose_cmdline(dict) -> [Command line] 1470 Compose a command line list from an already parsed config 1471 dictionary, generated from RAW by ConfigMaster(). Use 1472 this to compose an optimal command line for a group of 1473 options. 1474 1475 The get_raw_config() calls parse(), so the tipical use of this 1476 class is: 1477 1478 raw = CommandLine().get_raw_config(sys.argv[1:]) 1479 """ 1480 def __init__(self): 1481 self.all_options = OPTIONS.keys() 1482 self.all_flags = FLAGS.keys() 1483 self.all_actions = ACTIONS.keys() 1484 1485 # short:long options equivalence 1486 self.short_long = { 1487 'h':'help' , 'V':'version', 1488 'n':'enum-title', 'i':'infile' , 1489 'H':'no-headers', 'o':'outfile', 1490 'v':'verbose' , 't':'target' , 1491 'q':'quiet' , 'C':'config-file' 1492 } 1493 1494 # compose valid short and long options data for getopt 1495 self.short_opts = self._compose_short_opts() 1496 self.long_opts = self._compose_long_opts() 1497 1498 def _compose_short_opts(self): 1499 "Returns a string like 'hVt:o' with all short options/flags" 1500 ret = [] 1501 for opt in self.short_long.keys(): 1502 long = self.short_long[opt] 1503 if long in self.all_options: # is flag or option? 1504 opt = opt+':' # option: have param 1505 ret.append(opt) 1506 #Debug('Valid SHORT options: %s'%ret) 1507 return string.join(ret, '') 1508 1509 def _compose_long_opts(self): 1510 "Returns a list with all the valid long options/flags" 1511 ret = map(lambda x:x+'=', self.all_options) # add = 1512 ret.extend(self.all_flags) # flag ON 1513 ret.extend(self.all_actions) # acts 1514 ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-* 1515 ret.extend(['no-style','no-encoding']) # turn OFF 1516 ret.extend(['no-outfile','no-infile']) # turn OFF 1517 ret.extend(['no-dump-config', 'no-dump-source']) # turn OFF 1518 #Debug('Valid LONG options: %s'%ret) 1519 return ret 1520 1521 def _tokenize(self, cmd_string=''): 1522 "Convert a command line string to a list" 1523 #TODO protect quotes contents 1524 return string.split(cmd_string) 1525 1526 def parse(self, cmdline=[]): 1527 "Check/Parse a command line list TIP: no program name!" 1528 # get the valid options 1529 short, long = self.short_opts, self.long_opts 1530 # parse it! 1531 try: 1532 opts, args = getopt.getopt(cmdline, short, long) 1533 except getopt.error, errmsg: 1534 Error(_("%s (try --help)")%errmsg) 1535 return (opts, args) 1536 1537 def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0): 1538 "Returns the options/arguments found as RAW config" 1539 if not cmdline: return [] 1540 ret = [] 1541 # we need lists, not strings 1542 if type(cmdline) == type(''): cmdline = self._tokenize(cmdline) 1543 opts, args = self.parse(cmdline[:]) 1544 # parse all options 1545 for name,value in opts: 1546 # remove leading - and -- 1547 name = re.sub('^--?', '', name) 1548 # alias to old mispelled 'suGGar' 1549 if name == 'css-suggar': name = 'css-sugar' 1550 elif name == 'no-css-suggar': name = 'no-css-sugar' 1551 # translate short opt to long 1552 if len(name) == 1: name = self.short_long.get(name) 1553 # outfile exception: path relative to PWD 1554 if name == 'outfile' and relative \ 1555 and value not in [STDOUT, MODULEOUT]: 1556 value = os.path.abspath(value) 1557 # config-file inclusion, path relative to PWD 1558 if name == 'config-file': 1559 configs = ConfigLines().include_config_file( 1560 value) 1561 # remove the 'target' item of all configs 1562 configs = map(lambda c: [c[1],c[2]], configs) 1563 ret.extend(configs) 1564 continue 1565 # save it 1566 ret.append([name, value]) 1567 # get infile, if any 1568 while args: 1569 infile = args.pop(0) 1570 ret.append(['infile', infile]) 1571 # apply 'ignore' and 'filter' rules (filter is stronger) 1572 temp = ret[:] ; ret = [] 1573 for name,value in temp: 1574 if (not filter and not ignore) or \ 1575 (filter and name in filter) or \ 1576 (ignore and name not in ignore): 1577 ret.append( ['all', name, value] ) 1578 # add the original command line string as 'realcmdline' 1579 ret.append( ['all', 'realcmdline', cmdline] ) 1580 return ret 1581 1582 def compose_cmdline(self, conf={}, no_check=0): 1583 "compose a full (and diet) command line from CONF dict" 1584 if not conf: return [] 1585 args = [] 1586 dft_options = OPTIONS.copy() 1587 cfg = conf.copy() 1588 valid_opts = self.all_options + self.all_flags 1589 use_short = {'no-headers':'H', 'enum-title':'n'} 1590 # remove useless options 1591 if not no_check and cfg.get('toc-only'): 1592 if cfg.has_key('no-headers'): 1593 del cfg['no-headers'] 1594 if cfg.has_key('outfile'): 1595 del cfg['outfile'] # defaults to STDOUT 1596 if cfg.get('target') == 'txt': 1597 del cfg['target'] # already default 1598 args.append('--toc-only') # must be the first 1599 del cfg['toc-only'] 1600 # add target type 1601 if cfg.has_key('target'): 1602 args.append('-t '+cfg['target']) 1603 del cfg['target'] 1604 # add other options 1605 for key in cfg.keys(): 1606 if key not in valid_opts: continue # may be a %!setting 1607 if key in ['outfile','infile']: continue # later 1608 val = cfg[key] 1609 if not val: continue 1610 # default values are useless on cmdline 1611 if val == dft_options.get(key): continue 1612 # -short format 1613 if key in use_short.keys(): 1614 args.append('-'+use_short[key]) 1615 continue 1616 # --long format 1617 if key in self.all_flags: # add --option 1618 args.append('--'+key) 1619 else: # add --option=value 1620 args.append('--%s=%s'%(key,val)) 1621 # the outfile using -o 1622 if cfg.has_key('outfile') and \ 1623 cfg['outfile'] != dft_options.get('outfile'): 1624 args.append('-o '+cfg['outfile']) 1625 # place input file(s) always at the end 1626 if cfg.has_key('infile'): 1627 args.append(string.join(cfg['infile'],' ')) 1628 # return as a nice list 1629 Debug("Diet command line: %s"%string.join(args,' '), 1) 1630 return args 1631 1632############################################################################## 1633 1634class SourceDocument: 1635 """ 1636 SourceDocument class - scan document structure, extract data 1637 1638 It knows about full files. It reads a file and identify all 1639 the areas begining (Head,Conf,Body). With this info it can 1640 extract each area contents. 1641 Note: the original line break is removed. 1642 1643 DATA: 1644 self.arearef - Save Head, Conf, Body init line number 1645 self.areas - Store the area names which are not empty 1646 self.buffer - The full file contents (with NO \\r, \\n) 1647 1648 METHODS: 1649 get() - Access the contents of an Area. Example: 1650 config = SourceDocument(file).get('conf') 1651 1652 split() - Get all the document Areas at once. Example: 1653 head, conf, body = SourceDocument(file).split() 1654 1655 RULES: 1656 * The document parts are sequential: Head, Conf and Body. 1657 * One ends when the next begins. 1658 * The Conf Area is optional, so a document can have just 1659 Head and Body Areas. 1660 1661 These are the Areas limits: 1662 - Head Area: the first three lines 1663 - Body Area: from the first valid text line to the end 1664 - Conf Area: the comments between Head and Body Areas 1665 1666 Exception: If the first line is blank, this means no 1667 header info, so the Head Area is just the first line. 1668 """ 1669 def __init__(self, filename='', contents=[]): 1670 self.areas = ['head','conf','body'] 1671 self.arearef = [] 1672 self.areas_fancy = '' 1673 self.filename = filename 1674 self.buffer = [] 1675 if filename: 1676 self.scan_file(filename) 1677 elif contents: 1678 self.scan(contents) 1679 1680 def split(self): 1681 "Returns all document parts, splitted into lists." 1682 return self.get('head'), self.get('conf'), self.get('body') 1683 1684 def get(self, areaname): 1685 "Returns head|conf|body contents from self.buffer" 1686 # sanity 1687 if areaname not in self.areas: return [] 1688 if not self.buffer : return [] 1689 # go get it 1690 bufini = 1 1691 bufend = len(self.buffer) 1692 if areaname == 'head': 1693 ini = bufini 1694 end = self.arearef[1] or self.arearef[2] or bufend 1695 elif areaname == 'conf': 1696 ini = self.arearef[1] 1697 end = self.arearef[2] or bufend 1698 elif areaname == 'body': 1699 ini = self.arearef[2] 1700 end = bufend 1701 else: 1702 Error("Unknown Area name '%s'"%areaname) 1703 lines = self.buffer[ini:end] 1704 # make sure head will always have 3 lines 1705 while areaname == 'head' and len(lines) < 3: 1706 lines.append('') 1707 return lines 1708 1709 def scan_file(self, filename): 1710 Debug("source file: %s"%filename) 1711 Message(_("Loading source document"),1) 1712 buf = Readfile(filename, remove_linebreaks=1) 1713 self.scan(buf) 1714 1715 def scan(self, lines): 1716 "Run through source file and identify head/conf/body areas" 1717 buf = lines 1718 if len(buf) == 0: 1719 Error(_('The input file is empty: %s')%self.filename) 1720 cfg_parser = ConfigLines().parse_line 1721 buf.insert(0, '') # text start at pos 1 1722 ref = [1,4,0] 1723 if not string.strip(buf[1]): # no header 1724 ref[0] = 0 ; ref[1] = 2 1725 rgx = getRegexes() 1726 for i in range(ref[1],len(buf)): # find body init: 1727 if string.strip(buf[i]) and ( # ... not blank and 1728 buf[i][0] != '%' or # ... not comment or 1729 rgx['macros'].match(buf[i]) or # ... %%macro 1730 rgx['toc'].match(buf[i]) or # ... %%toc 1731 cfg_parser(buf[i],'include')[1]): # ... %!include 1732 ref[2] = i ; break 1733 if ref[1] == ref[2]: ref[1] = 0 # no conf area 1734 for i in 0,1,2: # del !existent 1735 if ref[i] >= len(buf): ref[i] = 0 # title-only 1736 if not ref[i]: self.areas[i] = '' 1737 Debug('Head,Conf,Body start line: %s'%ref) 1738 self.arearef = ref # save results 1739 self.buffer = buf 1740 # fancyness sample: head conf body (1 4 8) 1741 self.areas_fancy = "%s (%s)"%( 1742 string.join(self.areas), 1743 string.join(map(str, map(lambda x:x or '', ref)))) 1744 Message(_("Areas found: %s")%self.areas_fancy, 2) 1745 1746 def get_raw_config(self): 1747 "Handy method to get the CONF area RAW config (if any)" 1748 if not self.areas.count('conf'): return [] 1749 Message(_("Scanning source document CONF area"),1) 1750 raw = ConfigLines( 1751 file=self.filename, lines=self.get('conf'), 1752 first_line=self.arearef[1]).get_raw_config() 1753 Debug("document raw config: %s"%raw, 1) 1754 return raw 1755 1756############################################################################## 1757 1758class ConfigMaster: 1759 """ 1760 ConfigMaster class - the configuration wizard 1761 1762 This class is the configuration master. It knows how to handle 1763 the RAW and PARSED config format. It also performs the sanity 1764 checkings for a given configuration. 1765 1766 DATA: 1767 self.raw - Stores the config on the RAW format 1768 self.parsed - Stores the config on the PARSED format 1769 self.defaults - Stores the default values for all keys 1770 self.off - Stores the OFF values for all keys 1771 self.multi - List of keys which can have multiple values 1772 self.numeric - List of keys which value must be a number 1773 self.incremental - List of keys which are incremental 1774 1775 RAW FORMAT: 1776 The RAW format is a list of lists, being each mother list item 1777 a full configuration entry. Any entry is a 3 item list, on 1778 the following format: [ TARGET, KEY, VALUE ] 1779 Being a list, the order is preserved, so it's easy to use 1780 different kinds of configs, as CONF area and command line, 1781 respecting the precedence. 1782 The special target 'all' is used when no specific target was 1783 defined on the original config. 1784 1785 PARSED FORMAT: 1786 The PARSED format is a dictionary, with all the 'key : value' 1787 found by reading the RAW config. The self.target contents 1788 matters, so this dictionary only contains the target's 1789 config. The configs of other targets are ignored. 1790 1791 The CommandLine and ConfigLines classes have the get_raw_config() 1792 method which convert the configuration found to the RAW format. 1793 Just feed it to parse() and get a brand-new ready-to-use config 1794 dictionary. Example: 1795 1796 >>> raw = CommandLine().get_raw_config(['-n', '-H']) 1797 >>> print raw 1798 [['all', 'enum-title', ''], ['all', 'no-headers', '']] 1799 >>> parsed = ConfigMaster(raw).parse() 1800 >>> print parsed 1801 {'enum-title': 1, 'headers': 0} 1802 """ 1803 def __init__(self, raw=[], target=''): 1804 self.raw = raw 1805 self.target = target 1806 self.parsed = {} 1807 self.dft_options = OPTIONS.copy() 1808 self.dft_flags = FLAGS.copy() 1809 self.dft_actions = ACTIONS.copy() 1810 self.dft_settings = SETTINGS.copy() 1811 self.defaults = self._get_defaults() 1812 self.off = self._get_off() 1813 self.multi = ['infile', 'options','preproc','postproc'] 1814 self.incremental = ['verbose'] 1815 self.numeric = ['toc-level','split'] 1816 1817 def _get_defaults(self): 1818 "Get the default values for all config/options/flags" 1819 empty = {} 1820 for kw in CONFIG_KEYWORDS: empty[kw] = '' 1821 empty.update(self.dft_options) 1822 empty.update(self.dft_flags) 1823 empty.update(self.dft_actions) 1824 empty.update(self.dft_settings) 1825 empty['realcmdline'] = '' # internal use only 1826 empty['sourcefile'] = '' # internal use only 1827 return empty 1828 1829 def _get_off(self): 1830 "Turns OFF all the config/options/flags" 1831 off = {} 1832 for key in self.defaults.keys(): 1833 kind = type(self.defaults[key]) 1834 if kind == type(9): 1835 off[key] = 0 1836 elif kind == type(''): 1837 off[key] = '' 1838 elif kind == type([]): 1839 off[key] = [] 1840 else: 1841 Error('ConfigMaster: %s: Unknown type'+key) 1842 return off 1843 1844 def _check_target(self): 1845 "Checks if the target is already defined. If not, do it" 1846 if not self.target: 1847 self.target = self.find_value('target') 1848 1849 def get_target_raw(self): 1850 "Returns the raw config for self.target or 'all'" 1851 ret = [] 1852 self._check_target() 1853 for entry in self.raw: 1854 if entry[0] in [self.target, 'all']: 1855 ret.append(entry) 1856 return ret 1857 1858 def add(self, key, val): 1859 "Adds the key:value pair to the config dictionary (if needed)" 1860 # %!options 1861 if key == 'options': 1862 ignoreme = self.dft_actions.keys() + ['target'] 1863 ignoreme.remove('dump-config') 1864 ignoreme.remove('dump-source') 1865 raw_opts = CommandLine().get_raw_config( 1866 val, ignore=ignoreme) 1867 for target, key, val in raw_opts: 1868 self.add(key, val) 1869 return 1870 # the no- prefix turns OFF this key 1871 if key[:3] == 'no-': 1872 key = key[3:] # remove prefix 1873 val = self.off.get(key) # turn key OFF 1874 # is this key valid? 1875 if key not in self.defaults.keys(): 1876 Debug('Bogus Config %s:%s'%(key,val),1) 1877 return 1878 # is this value the default one? 1879 if val == self.defaults.get(key): 1880 # if default value, remove previous key:val 1881 if self.parsed.has_key(key): 1882 del self.parsed[key] 1883 # nothing more to do 1884 return 1885 # flags ON comes empty. we'll add the 1 value now 1886 if val == '' and \ 1887 key in self.dft_flags.keys()+self.dft_actions.keys(): 1888 val = 1 1889 # multi value or single? 1890 if key in self.multi: 1891 # first one? start new list 1892 if not self.parsed.has_key(key): 1893 self.parsed[key] = [] 1894 self.parsed[key].append(val) 1895 # incremental value? so let's add it 1896 elif key in self.incremental: 1897 self.parsed[key] = (self.parsed.get(key) or 0) + val 1898 else: 1899 self.parsed[key] = val 1900 fancykey = dotted_spaces("%12s"%key) 1901 Message(_("Added config %s : %s")%(fancykey,val),3) 1902 1903 def get_outfile_name(self, config={}): 1904 "Dirname is the same for {in,out}file" 1905 infile, outfile = config['sourcefile'], config['outfile'] 1906 if outfile and outfile not in [STDOUT, MODULEOUT] \ 1907 and not os.path.isabs(outfile): 1908 outfile = os.path.join(os.path.dirname(infile), outfile) 1909 if infile == STDIN and not outfile: outfile = STDOUT 1910 if infile == MODULEIN and not outfile: outfile = MODULEOUT 1911 if not outfile and (infile and config.get('target')): 1912 basename = re.sub('\.(txt|t2t)$','',infile) 1913 outfile = "%s.%s"%(basename, config['target']) 1914 Debug(" infile: '%s'"%infile , 1) 1915 Debug("outfile: '%s'"%outfile, 1) 1916 return outfile 1917 1918 def sanity(self, config, gui=0): 1919 "Basic config sanity checkings" 1920 if not config: return {} 1921 target = config.get('target') 1922 # some actions don't require target specification 1923 if not target: 1924 for action in NO_TARGET: 1925 if config.get(action): 1926 target = 'txt' 1927 break 1928 # on GUI, some checkings are skipped 1929 if not gui: 1930 # we *need* a target 1931 if not target: 1932 Error(_('No target specified (try --help)')+\ 1933 '\n\n'+\ 1934 _('Maybe trying to convert an old v1.x file?')) 1935 # and of course, an infile also 1936 if not config.get('infile'): 1937 Error(_('Missing input file (try --help)')) 1938 # is the target valid? 1939 if not TARGETS.count(target): 1940 Error(_("Invalid target '%s' (try --help)")%\ 1941 target) 1942 # ensure all keys are present 1943 empty = self.defaults.copy() ; empty.update(config) 1944 config = empty.copy() 1945 # check integers options 1946 for key in config.keys(): 1947 if key in self.numeric: 1948 try: config[key] = int(config[key]) 1949 except: Error(_('--%s value must be a number' 1950 )%key) 1951 # check split level value 1952 if config['split'] not in [0,1,2]: 1953 Error(_('Option --split must be 0, 1 or 2')) 1954 # --toc-only is stronger than others 1955 if config['toc-only']: 1956 config['headers'] = 0 1957 config['toc'] = 0 1958 config['split'] = 0 1959 config['gui'] = 0 1960 config['outfile'] = config['outfile'] or STDOUT 1961 # splitting is disable for now (future: HTML only, no STDOUT) 1962 config['split'] = 0 1963 # restore target 1964 config['target'] = target 1965 # set output file name 1966 config['outfile'] = self.get_outfile_name(config) 1967 # checking suicide 1968 if config['sourcefile'] == config['outfile'] and \ 1969 config['outfile'] not in [STDOUT,MODULEOUT] and not gui: 1970 Error(_("Input and Output files are the same: %s")%( 1971 config['outfile'])) 1972 return config 1973 1974 def parse(self): 1975 "Returns the parsed config for the current target" 1976 raw = self.get_target_raw() 1977 for target, key, value in raw: 1978 self.add(key, value) 1979 Message(_("Added the following keys: %s")%string.join( 1980 self.parsed.keys(),', '),2) 1981 return self.parsed.copy() 1982 1983 def find_value(self, key='', target=''): 1984 "Scans ALL raw config to find the desired key" 1985 ret = [] 1986 # scan and save all values found 1987 for targ, k, val in self.raw: 1988 if targ in [target, 'all'] and k == key: 1989 ret.append(val) 1990 if not ret: return '' 1991 # if not multi value, return only the last found 1992 if key in self.multi: return ret 1993 else : return ret[-1] 1994 1995######################################################################## 1996 1997class ConfigLines: 1998 """ 1999 ConfigLines class - the config file data extractor 2000 2001 This class reads and parse the config lines on the %!key:val 2002 format, converting it to RAW config. It deals with user 2003 config file (RC file), source document CONF area and 2004 %!includeconf directives. 2005 2006 Call it passing a file name or feed the desired config lines. 2007 Then just call the get_raw_config() method and wait to 2008 receive the full config data on the RAW format. This method 2009 also follows the possible %!includeconf directives found on 2010 the config lines. Example: 2011 2012 raw = ConfigLines(file=".txt2tagsrc").get_raw_config() 2013 2014 The parse_line() method is also useful to be used alone, 2015 to identify and tokenize a single config line. For example, 2016 to get the %!include command components, on the source 2017 document BODY: 2018 2019 target, key, value = ConfigLines().parse_line(body_line) 2020 """ 2021 def __init__(self, file='', lines=[], first_line=1): 2022 self.file = file or 'NOFILE' 2023 self.lines = lines 2024 self.first_line = first_line 2025 2026 def load_lines(self): 2027 "Make sure we've loaded the file contents into buffer" 2028 if not self.lines and not self.file: 2029 Error("ConfigLines: No file or lines provided") 2030 if not self.lines: 2031 self.lines = self.read_config_file(self.file) 2032 2033 def read_config_file(self, filename=''): 2034 "Read a Config File contents, aborting on invalid line" 2035 if not filename: return [] 2036 errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s" 2037 lines = Readfile(filename, remove_linebreaks=1) 2038 # sanity: try to find invalid config lines 2039 for i in range(len(lines)): 2040 line = string.rstrip(lines[i]) 2041 if not line: continue # empty 2042 if line[0] != '%': Error(errormsg%(filename,i+1,line)) 2043 return lines 2044 2045 def include_config_file(self, file=''): 2046 "Perform the %!includeconf action, returning RAW config" 2047 if not file: return [] 2048 # current dir relative to the current file (self.file) 2049 current_dir = os.path.dirname(self.file) 2050 file = os.path.join(current_dir, file) 2051 # read and parse included config file contents 2052 lines = self.read_config_file(file) 2053 return ConfigLines(file=file, lines=lines).get_raw_config() 2054 2055 def get_raw_config(self): 2056 "Scan buffer and extract all config as RAW (including includes)" 2057 ret = [] 2058 self.load_lines() 2059 first = self.first_line 2060 for i in range(len(self.lines)): 2061 line = self.lines[i] 2062 Message(_("Processing line %03d: %s")%(first+i,line),2) 2063 target, key, val = self.parse_line(line) 2064 if not key: continue # no config on this line 2065 if key == 'includeconf': 2066 err = _('A file cannot include itself (loop!)') 2067 if val == self.file: 2068 Error("%s: %%!includeconf: %s"%( 2069 err, self.file)) 2070 more_raw = self.include_config_file(val) 2071 ret.extend(more_raw) 2072 Message(_("Finished Config file inclusion: %s" 2073 )%(val),2) 2074 else: 2075 ret.append([target, key, val]) 2076 Message(_("Added %s")%key,3) 2077 return ret 2078 2079 def parse_line(self, line='', keyname='', target=''): 2080 "Detects %!key:val config lines and extract data from it" 2081 empty = ['', '', ''] 2082 if not line: return empty 2083 no_target = ['target', 'includeconf'] 2084 re_name = keyname or '[a-z]+' 2085 re_target = target or '[a-z]*' 2086 cfgregex = re.compile(""" 2087 ^%%!\s* # leading id with opt spaces 2088 (?P<name>%s)\s* # config name 2089 (\((?P<target>%s)\))? # optional target spec inside () 2090 \s*:\s* # key:value delimiter with opt spaces 2091 (?P<value>\S.+?) # config value 2092 \s*$ # rstrip() spaces and hit EOL 2093 """%(re_name,re_target), re.I+re.VERBOSE) 2094 prepostregex = re.compile(""" 2095 # ---[ PATTERN ]--- 2096 ^( "([^"]*)" # "double quoted" or 2097 | '([^']*)' # 'single quoted' or 2098 | ([^\s]+) # single_word 2099 ) 2100 \s+ # separated by spaces 2101 2102 # ---[ REPLACE ]--- 2103 ( "([^"]*)" # "double quoted" or 2104 | '([^']*)' # 'single quoted' or 2105 | (.*) # anything 2106 ) 2107 \s*$ 2108 """, re.VERBOSE) 2109 guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens 2110 match = cfgregex.match(line) 2111 if not match: return empty 2112 2113 name = string.lower(match.group('name') or '') 2114 target = string.lower(match.group('target') or 'all') 2115 value = match.group('value') 2116 2117 # NO target keywords: force all targets 2118 if name in no_target: target = 'all' 2119 2120 # special config for GUI colors 2121 if name == 'guicolors': 2122 valmatch = guicolors.search(value) 2123 if not valmatch: return empty 2124 value = re.split('\s+', value) 2125 2126 # Special config with two quoted values (%!preproc: "foo" 'bar') 2127 if name in ['preproc','postproc']: 2128 valmatch = prepostregex.search(value) 2129 if not valmatch: return empty 2130 getval = valmatch.group 2131 patt = getval(2) or getval(3) or getval(4) or '' 2132 repl = getval(6) or getval(7) or getval(8) or '' 2133 value = (patt, repl) 2134 return [target, name, value] 2135 2136############################################################################## 2137 2138class MaskMaster: 2139 "(Un)Protect important structures from escaping and formatting" 2140 def __init__(self): 2141 self.linkmask = 'vvvLINKvvv' 2142 self.monomask = 'vvvMONOvvv' 2143 self.macromask = 'vvvMACROvvv' 2144 self.rawmask = 'vvvRAWvvv' 2145 self.tocmask = 'vvvTOCvvv' 2146 self.macroman = MacroMaster() 2147 self.reset() 2148 2149 def reset(self): 2150 self.linkbank = [] 2151 self.monobank = [] 2152 self.macrobank = [] 2153 self.rawbank = [] 2154 2155 def mask(self, line=''): 2156 global AUTOTOC 2157 2158 # protect raw text 2159 while regex['raw'].search(line): 2160 txt = regex['raw'].search(line).group(1) 2161 txt = doEscape(TARGET,txt) 2162 self.rawbank.append(txt) 2163 line = regex['raw'].sub(self.rawmask,line,1) 2164 2165 # protect pre-formatted font text 2166 while regex['fontMono'].search(line): 2167 txt = regex['fontMono'].search(line).group(1) 2168 txt = doEscape(TARGET,txt) 2169 self.monobank.append(txt) 2170 line = regex['fontMono'].sub(self.monomask,line,1) 2171 2172 # protect macros 2173 while regex['macros'].search(line): 2174 txt = regex['macros'].search(line).group() 2175 self.macrobank.append(txt) 2176 line = regex['macros'].sub(self.macromask,line,1) 2177 2178 # protect TOC location 2179 while regex['toc'].search(line): 2180 line = regex['toc'].sub(self.tocmask,line) 2181 AUTOTOC = 0 2182 2183 # protect URLs and emails 2184 while regex['linkmark'].search(line) or \ 2185 regex['link' ].search(line): 2186 2187 # try to match plain or named links 2188 match_link = regex['link'].search(line) 2189 match_named = regex['linkmark'].search(line) 2190 2191 # define the current match 2192 if match_link and match_named: 2193 # both types found, which is the first? 2194 m = match_link 2195 if match_named.start() < match_link.start(): 2196 m = match_named 2197 else: 2198 # just one type found, we're fine 2199 m = match_link or match_named 2200 2201 # extract link data and apply mask 2202 if m == match_link: # plain link 2203 link = m.group() 2204 label = '' 2205 link_re = regex['link'] 2206 else: # named link 2207 link = m.group('link') 2208 label = string.rstrip(m.group('label')) 2209 link_re = regex['linkmark'] 2210 line = link_re.sub(self.linkmask,line,1) 2211 2212 # save link data to the link bank 2213 self.linkbank.append((label, link)) 2214 return line 2215 2216 def undo(self, line): 2217 2218 # url & email 2219 for label,url in self.linkbank: 2220 link = get_tagged_link(label, url) 2221 line = string.replace(line, self.linkmask, link, 1) 2222 2223 # expand macros 2224 for macro in self.macrobank: 2225 macro = self.macroman.expand(macro) 2226 line = string.replace(line, self.macromask, macro,1) 2227 2228 # expand verb 2229 for mono in self.monobank: 2230 open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose'] 2231 tagged = open+mono+close 2232 line = string.replace(line,self.monomask,tagged,1) 2233 2234 # expand raw 2235 for raw in self.rawbank: 2236 line = string.replace(line,self.rawmask,raw,1) 2237 2238 return line 2239 2240 2241############################################################################## 2242 2243 2244class TitleMaster: 2245 "Title things" 2246 def __init__(self): 2247 self.count = ['',0,0,0,0,0] 2248 self.toc = [] 2249 self.level = 0 2250 self.kind = '' 2251 self.txt = '' 2252 self.label = '' 2253 self.tag = '' 2254 self.tag_hold = [] 2255 self.last_level = 0 2256 self.count_id = '' 2257 self.user_labels = {} 2258 self.anchor_count = 0 2259 self.anchor_prefix = 'toc' 2260 2261 def _open_close_blocks(self): 2262 "Open new title blocks, closing the previous (if any)" 2263 if not rules['titleblocks']: return 2264 tag = '' 2265 last = self.last_level 2266 curr = self.level 2267 2268 # same level, just close the previous 2269 if curr == last: 2270 tag = TAGS.get('title%dClose'%last) 2271 if tag: self.tag_hold.append(tag) 2272 2273 # section -> subsection, more depth 2274 while curr > last: 2275 last = last + 1 2276 2277 # open the new block of subsections 2278 tag = TAGS.get('blockTitle%dOpen'%last) 2279 if tag: self.tag_hold.append(tag) 2280 2281 # jump from title1 to title3 or more 2282 # fill the gap with an empty section 2283 if curr - last > 0: 2284 tag = TAGS.get('title%dOpen'%last) 2285 tag = regex['x'].sub('', tag) # del \a 2286 if tag: self.tag_hold.append(tag) 2287 2288 # section <- subsection, less depth 2289 while curr < last: 2290 # close the current opened subsection 2291 tag = TAGS.get('title%dClose'%last) 2292 if tag: self.tag_hold.append(tag) 2293 2294 # close the current opened block of subsections 2295 tag = TAGS.get('blockTitle%dClose'%last) 2296 if tag: self.tag_hold.append(tag) 2297 2298 last = last - 1 2299 2300 # close the previous section of the same level 2301 # the subsections were under it 2302 if curr == last: 2303 tag = TAGS.get('title%dClose'%last) 2304 if tag: self.tag_hold.append(tag) 2305 2306 def add(self, line): 2307 "Parses a new title line." 2308 if not line: return 2309 self._set_prop(line) 2310 self._open_close_blocks() 2311 self._set_count_id() 2312 self._set_label() 2313 self._save_toc_info() 2314 2315 def close_all(self): 2316 "Closes all opened title blocks" 2317 ret = [] 2318 ret.extend(self.tag_hold) 2319 while self.level: 2320 tag = TAGS.get('title%dClose'%self.level) 2321 if tag: ret.append(tag) 2322 tag = TAGS.get('blockTitle%dClose'%self.level) 2323 if tag: ret.append(tag) 2324 self.level = self.level - 1 2325 return ret 2326 2327 def _save_toc_info(self): 2328 "Save TOC info, used by self.dump_marked_toc()" 2329 self.toc.append((self.level, self.count_id, 2330 self.txt , self.label )) 2331 2332 def _set_prop(self, line=''): 2333 "Extract info from original line and set data holders." 2334 # detect title type (numbered or not) 2335 id = string.lstrip(line)[0] 2336 if id == '=': kind = 'title' 2337 elif id == '+': kind = 'numtitle' 2338 else: Error("Unknown Title ID '%s'"%id) 2339 # extract line info 2340 match = regex[kind].search(line) 2341 level = len(match.group('id')) 2342 txt = string.strip(match.group('txt')) 2343 label = match.group('label') 2344 # parse info & save 2345 if CONF['enum-title']: kind = 'numtitle' # force 2346 if rules['titleblocks']: 2347 self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \ 2348 TAGS.get('title%dOpen'%level) 2349 else: 2350 self.tag = TAGS.get(kind+`level`) or \ 2351 TAGS.get('title'+`level`) 2352 self.last_level = self.level 2353 self.kind = kind 2354 self.level = level 2355 self.txt = txt 2356 self.label = label 2357 2358 def _set_count_id(self): 2359 "Compose and save the title count identifier (if needed)." 2360 count_id = '' 2361 if self.kind == 'numtitle' and not rules['autonumbertitle']: 2362 # manually increase title count 2363 self.count[self.level] = self.count[self.level] +1 2364 # reset sublevels count (if any) 2365 max_levels = len(self.count) 2366 if self.level < max_levels-1: 2367 for i in range(self.level+1, max_levels): 2368 self.count[i] = 0 2369 # compose count id from hierarchy 2370 for i in range(self.level): 2371 count_id= "%s%d."%(count_id, self.count[i+1]) 2372 self.count_id = count_id 2373 2374 def _set_label(self): 2375 "Compose and save title label, used by anchors." 2376 # remove invalid chars from label set by user 2377 self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '') 2378 # generate name as 15 first :alnum: chars 2379 #TODO how to translate safely accented chars to plain? 2380 #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15] 2381 # 'tocN' label - sequential count, ignoring 'toc-level' 2382 #self.label = self.anchor_prefix + str(len(self.toc)+1) 2383 2384 def _get_tagged_anchor(self): 2385 "Return anchor if user defined a label, or TOC is on." 2386 ret = '' 2387 label = self.label 2388 if CONF['toc'] and self.level <= CONF['toc-level']: 2389 # this count is needed bcos self.toc stores all 2390 # titles, regardless of the 'toc-level' setting, 2391 # so we can't use self.toc lenght to number anchors 2392 self.anchor_count = self.anchor_count + 1 2393 # autonumber label (if needed) 2394 label = label or '%s%s'%( 2395 self.anchor_prefix, self.anchor_count) 2396 if label and TAGS['anchor']: 2397 ret = regex['x'].sub(label,TAGS['anchor']) 2398 return ret 2399 2400 def _get_full_title_text(self): 2401 "Returns the full title contents, already escaped." 2402 ret = self.txt 2403 # insert count_id (if any) before text 2404 if self.count_id: 2405 ret = '%s %s'%(self.count_id, ret) 2406 # escape specials 2407 ret = doEscape(TARGET, ret) 2408 # same targets needs final escapes on title lines 2409 # it's here because there is a 'continue' after title 2410 if rules['finalescapetitle']: 2411 ret = doFinalEscape(TARGET, ret) 2412 return ret 2413 2414 def get(self): 2415 "Returns the tagged title as a list." 2416 ret = [] 2417 2418 # maybe some anchoring before? 2419 anchor = self._get_tagged_anchor() 2420 self.tag = regex['_anchor'].sub(anchor, self.tag) 2421 2422 ### compose & escape title text (TOC uses unescaped) 2423 full_title = self._get_full_title_text() 2424 2425 # close previous section area 2426 ret.extend(self.tag_hold) 2427 self.tag_hold = [] 2428 2429 # finish title, adding "underline" on TXT target 2430 tagged = regex['x'].sub(full_title, self.tag) 2431 2432 if TARGET == 'txt': 2433 ret.append('') # blank line before 2434 ret.append(tagged) 2435 ret.append(regex['x'].sub('='*len(full_title),self.tag)) 2436 ret.append('') # blank line after 2437 else: 2438 ret.append(tagged) 2439 2440 return ret 2441 2442 def dump_marked_toc(self, max_level=99): 2443 "Dumps all toc itens as a valid t2t markup list" 2444 #TODO maybe use quote+linebreaks instead lists 2445 ret = [] 2446 toc_count = 1 2447 for level, count_id, txt, label in self.toc: 2448 if level > max_level: continue # ignore 2449 indent = ' '*level 2450 id_txt = string.lstrip('%s %s'%(count_id, txt)) 2451 label = label or self.anchor_prefix+`toc_count` 2452 toc_count = toc_count + 1 2453 # TOC will have links 2454 if TAGS['anchor']: 2455 # TOC is more readable with master topics 2456 # not linked at number. This is a stoled 2457 # idea from Windows .CHM help files 2458 if CONF['enum-title'] and level == 1: 2459 tocitem = '%s+ [""%s"" #%s]'%( 2460 indent, txt, label) 2461 else: 2462 tocitem = '%s- [""%s"" #%s]'%( 2463 indent, id_txt, label) 2464 # no links on TOC, just text 2465 else: 2466 # man don't reformat TOC lines, cool! 2467 if TARGET in ['txt', 'man']: 2468 tocitem = '%s""%s""' %( 2469 indent, id_txt) 2470 else: 2471 tocitem = '%s- ""%s""'%( 2472 indent, id_txt) 2473 ret.append(tocitem) 2474 return ret 2475 2476 2477############################################################################## 2478 2479#TODO check all this table mess 2480# trata linhas TABLE, com as prop do parse_row 2481# o metodo table() do BLOCK xunxa e troca as celulas pelas parseadas 2482class TableMaster: 2483 def __init__(self, line=''): 2484 self.rows = [] 2485 self.border = 0 2486 self.align = 'Left' 2487 self.cellalign = [] 2488 self.cellspan = [] 2489 if line: 2490 prop = self.parse_row(line) 2491 self.border = prop['border'] 2492 self.align = prop['align'] 2493 self.cellalign = prop['cellalign'] 2494 self.cellspan = prop['cellspan'] 2495 2496 def _get_open_tag(self): 2497 topen = TAGS['tableOpen'] 2498 tborder = TAGS['tableBorder'] 2499 talign = TAGS['tableAlign'+self.align] 2500 calignsep = TAGS['tableColAlignSep'] 2501 calign = '' 2502 2503 # the first line defines if table has border or not 2504 if not self.border: tborder = '' 2505 # set the columns alignment 2506 if rules['tablecellaligntype'] == 'column': 2507 calign = map(lambda x: TAGS['tableColAlign%s'%x], 2508 self.cellalign) 2509 calign = string.join(calign, calignsep) 2510 # align full table, set border and Column align (if any) 2511 topen = regex['_tableAlign' ].sub(talign , topen) 2512 topen = regex['_tableBorder' ].sub(tborder, topen) 2513 topen = regex['_tableColAlign'].sub(calign , topen) 2514 # tex table spec, border or not: {|l|c|r|} , {lcr} 2515 if calignsep and not self.border: 2516 # remove cell align separator 2517 topen = string.replace(topen, calignsep, '') 2518 return topen 2519 2520 def _get_cell_align(self, cells): 2521 ret = [] 2522 for cell in cells: 2523 align = 'Left' 2524 if string.strip(cell): 2525 if cell[0] == ' ' and cell[-1] == ' ': 2526 align = 'Center' 2527 elif cell[0] == ' ': 2528 align = 'Right' 2529 ret.append(align) 2530 return ret 2531 2532 def _get_cell_span(self, cells): 2533 ret = [] 2534 for cell in cells: 2535 span = 0 2536 m = re.search('\a(\|+)$', cell) 2537 if m: span = len(m.group(1))+1 2538 ret.append(span) 2539 return ret 2540 2541 def _tag_cells(self, rowdata): 2542 row = [] 2543 cells = rowdata['cells'] 2544 open = TAGS['tableCellOpen'] 2545 close = TAGS['tableCellClose'] 2546 sep = TAGS['tableCellSep'] 2547 calign = map(lambda x: TAGS['tableCellAlign'+x], 2548 rowdata['cellalign']) 2549 # populate the span tag 2550 cspan = [] 2551 for i in rowdata['cellspan']: 2552 if i > 0: 2553 cspan.append(regex['x'].sub( 2554 str(i), TAGS['tableCellColSpan'])) 2555 else: 2556 cspan.append('') 2557 2558 # maybe is it a title row? 2559 if rowdata['title']: 2560 open = TAGS['tableTitleCellOpen'] or open 2561 close = TAGS['tableTitleCellClose'] or close 2562 sep = TAGS['tableTitleCellSep'] or sep 2563 2564 # should we break the line on *each* table cell? 2565 if rules['breaktablecell']: close = close+'\n' 2566 2567 # cells pre processing 2568 if rules['tablecellstrip']: 2569 cells = map(lambda x: string.strip(x), cells) 2570 if rowdata['title'] and rules['tabletitlerowinbold']: 2571 cells = map(lambda x: enclose_me('fontBold',x), cells) 2572 2573 # add cell BEGIN/END tags 2574 for cell in cells: 2575 copen = open 2576 # insert cell align into open tag (if cell is alignable) 2577 if rules['tablecellaligntype'] == 'cell': 2578 copen = regex['_tableCellAlign'].sub( 2579 calign.pop(0), copen) 2580 if rules['tablecellspannable']: 2581 copen = regex['_tableCellColSpan'].sub( 2582 cspan.pop(0), copen) 2583 row.append(copen + cell + close) 2584 2585 # maybe there are cell separators? 2586 return string.join(row, sep) 2587 2588 def add_row(self, cells): 2589 self.rows.append(cells) 2590 2591 def parse_row(self, line): 2592 # default table proprierties 2593 ret = {'border':0,'title':0,'align':'Left', 2594 'cells':[],'cellalign':[], 'cellspan':[]} 2595 # detect table align (and remove spaces mark) 2596 if line[0] == ' ': ret['align'] = 'Center' 2597 line = string.lstrip(line) 2598 # detect title mark 2599 if line[1] == '|': ret['title'] = 1 2600 # detect border mark and normalize the EOL 2601 m = re.search(' (\|+) *$', line) 2602 if m: line = line+' ' ; ret['border'] = 1 2603 else: line = line+' | ' 2604 # delete table mark 2605 line = regex['table'].sub('', line) 2606 # detect colspan | foo | bar baz ||| 2607 line = re.sub(' (\|+)\| ', '\a\\1 | ', line) 2608 # split cells (the last is fake) 2609 ret['cells'] = string.split(line, ' | ')[:-1] 2610 # find cells span 2611 ret['cellspan'] = self._get_cell_span(ret['cells']) 2612 # remove span ID 2613 ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells']) 2614 # find cells align 2615 ret['cellalign'] = self._get_cell_align(ret['cells']) 2616 # hooray! 2617 Debug('Table Prop: %s' % ret, 7) 2618 return ret 2619 2620 def dump(self): 2621 open = self._get_open_tag() 2622 rows = self.rows 2623 close = TAGS['tableClose'] 2624 2625 rowopen = TAGS['tableRowOpen'] 2626 rowclose = TAGS['tableRowClose'] 2627 rowsep = TAGS['tableRowSep'] 2628 titrowopen = TAGS['tableTitleRowOpen'] or rowopen 2629 titrowclose = TAGS['tableTitleRowClose'] or rowclose 2630 2631 if rules['breaktablelineopen']: 2632 rowopen = rowopen + '\n' 2633 titrowopen = titrowopen + '\n' 2634 2635 # tex gotchas 2636 if TARGET == 'tex': 2637 if not self.border: 2638 rowopen = titrowopen = '' 2639 else: 2640 close = rowopen + close 2641 2642 # now we tag all the table cells on each row 2643 #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15 2644 tagged_cells = [] 2645 for cell in rows: tagged_cells.append(self._tag_cells(cell)) 2646 2647 # add row separator tags between lines 2648 tagged_rows = [] 2649 if rowsep: 2650 #!py15 2651 #tagged_rows = map(lambda x:x+rowsep, tagged_cells) 2652 for cell in tagged_cells: 2653 tagged_rows.append(cell+rowsep) 2654 # remove last rowsep, because the table is over 2655 tagged_rows[-1] = string.replace( 2656 tagged_rows[-1], rowsep, '') 2657 # add row BEGIN/END tags for each line 2658 else: 2659 for rowdata in rows: 2660 if rowdata['title']: 2661 o,c = titrowopen, titrowclose 2662 else: 2663 o,c = rowopen, rowclose 2664 row = tagged_cells.pop(0) 2665 tagged_rows.append(o + row + c) 2666 2667 fulltable = [open] + tagged_rows + [close] 2668 2669 if rules['blankendtable']: fulltable.append('') 2670 return fulltable 2671 2672 2673############################################################################## 2674 2675 2676class BlockMaster: 2677 "TIP: use blockin/out to add/del holders" 2678 def __init__(self): 2679 self.BLK = [] 2680 self.HLD = [] 2681 self.PRP = [] 2682 self.depth = 0 2683 self.last = '' 2684 self.tableparser = None 2685 self.contains = { 2686 'para' :['passthru','raw'], 2687 'verb' :[], 2688 'table' :[], 2689 'raw' :[], 2690 'passthru':[], 2691 'quote' :['quote','passthru','raw'], 2692 'list' :['list' ,'numlist' ,'deflist','para','verb', 2693 'raw' ,'passthru'], 2694 'numlist' :['list' ,'numlist' ,'deflist','para','verb', 2695 'raw' ,'passthru'], 2696 'deflist' :['list' ,'numlist' ,'deflist','para','verb', 2697 'raw' ,'passthru'] 2698 } 2699 self.allblocks = self.contains.keys() 2700 2701 def block(self): 2702 if not self.BLK: return '' 2703 return self.BLK[-1] 2704 2705 def isblock(self, name=''): 2706 return self.block() == name 2707 2708 def prop(self, key): 2709 if not self.PRP: return '' 2710 return self.PRP[-1].get(key) or '' 2711 2712 def propset(self, key, val): 2713 self.PRP[-1][key] = val 2714 #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1) 2715 #Debug('BLOCK props: %s'%(repr(self.PRP)), 1) 2716 2717 def hold(self): 2718 if not self.HLD: return [] 2719 return self.HLD[-1] 2720 2721 def holdadd(self, line): 2722 if self.block()[-4:] == 'list': line = [line] 2723 self.HLD[-1].append(line) 2724 Debug('HOLD add: %s'%repr(line), 4) 2725 Debug('FULL HOLD: %s'%self.HLD, 4) 2726 2727 def holdaddsub(self, line): 2728 self.HLD[-1][-1].append(line) 2729 Debug('HOLD addsub: %s'%repr(line), 4) 2730 Debug('FULL HOLD: %s'%self.HLD, 4) 2731 2732 def holdextend(self, lines): 2733 if self.block()[-4:] == 'list': lines = [lines] 2734 self.HLD[-1].extend(lines) 2735 Debug('HOLD extend: %s'%repr(lines), 4) 2736 Debug('FULL HOLD: %s'%self.HLD, 4) 2737 2738 def blockin(self, block): 2739 ret = [] 2740 if block not in self.allblocks: 2741 Error("Invalid block '%s'"%block) 2742 # first, let's close other possible open blocks 2743 while self.block() and block not in self.contains[self.block()]: 2744 ret.extend(self.blockout()) 2745 # now we can gladly add this new one 2746 self.BLK.append(block) 2747 self.HLD.append([]) 2748 self.PRP.append({}) 2749 if block == 'table': self.tableparser = TableMaster() 2750 # deeper and deeper 2751 self.depth = len(self.BLK) 2752 Debug('block ++ (%s): %s' % (block,self.BLK), 3) 2753 return ret 2754 2755 def blockout(self): 2756 if not self.BLK: Error('No block to pop') 2757 self.last = self.BLK.pop() 2758 tagged = getattr(self, self.last)() 2759 parsed = self.HLD.pop() 2760 self.PRP.pop() 2761 self.depth = len(self.BLK) 2762 if self.last == 'table': del self.tableparser 2763 # inserting a nested block into mother 2764 if self.block(): 2765 if self.block()[-4:] == 'list': 2766 self.HLD[-1][-1].append(tagged) 2767 else: 2768 self.HLD[-1].append(tagged) 2769 tagged = [] # reset. mother will have it all 2770 Debug('block -- (%s): %s' % (self.last,self.BLK), 3) 2771 Debug('RELEASED (%s): %s' % (self.last,parsed), 3) 2772 if tagged: Debug('BLOCK: %s'%tagged, 6) 2773 return tagged 2774 2775 def _last_escapes(self, line): 2776 return doFinalEscape(TARGET, line) 2777 2778 def _get_escaped_hold(self): 2779 ret = [] 2780 for line in self.hold(): 2781 linetype = type(line) 2782 if linetype == type(''): 2783 ret.append(self._last_escapes(line)) 2784 elif linetype == type([]): 2785 ret.extend(line) 2786 else: 2787 Error("BlockMaster: Unknown HOLD item type:" 2788 " %s"%linetype) 2789 return ret 2790 2791 def _remove_twoblanks(self, lastitem): 2792 if len(lastitem) > 1 and lastitem[-2:] == ['','']: 2793 return lastitem[:-2] 2794 return lastitem 2795 2796 def passthru(self): 2797 return self.hold() 2798 2799 def raw(self): 2800 lines = self.hold() 2801 return map(lambda x: doEscape(TARGET, x), lines) 2802 2803 def para(self): 2804 tagged = [] 2805 open = TAGS['paragraphOpen'] 2806 close = TAGS['paragraphClose'] 2807 lines = self._get_escaped_hold() 2808 # open (or not) paragraph 2809 if not open+close and self.last == 'para': 2810 pass # avoids multiple blank lines 2811 else: 2812 tagged.append(open) 2813 # pagemaker likes a paragraph as a single long line 2814 if rules['onelinepara']: 2815 tagged.append(string.join(lines,' ')) 2816 # others are normal :) 2817 else: 2818 tagged.extend(lines) 2819 tagged.append(close) 2820 2821 # very very very very very very very very very UGLY fix 2822 # needed because <center> can't appear inside <p> 2823 try: 2824 if len(lines) == 1 and \ 2825 TARGET in ('html', 'xhtml') and \ 2826 re.match('^\s*<center>.*</center>\s*$', lines[0]): 2827 tagged = [lines[0]] 2828 except: pass 2829 2830 return tagged 2831 2832 def verb(self): 2833 "Verbatim lines are not masked, so there's no need to unmask" 2834 tagged = [] 2835 tagged.append(TAGS['blockVerbOpen']) 2836 for line in self.hold(): 2837 if self.prop('mapped') == 'table': 2838 line = MacroMaster().expand(line) 2839 if not rules['verbblocknotescaped']: 2840 line = doEscape(TARGET,line) 2841 if rules['indentverbblock']: 2842 line = ' '+line 2843 if rules['verbblockfinalescape']: 2844 line = doFinalEscape(TARGET, line) 2845 tagged.append(line) 2846 #TODO maybe use if not TAGS['blockVerbClose'] 2847 if TARGET != 'pm6': 2848 tagged.append(TAGS['blockVerbClose']) 2849 return tagged 2850 2851 def table(self): 2852 # rewrite all table cells by the unmasked and escaped data 2853 lines = self._get_escaped_hold() 2854 for i in range(len(lines)): 2855 cells = string.split(lines[i], SEPARATOR) 2856 self.tableparser.rows[i]['cells'] = cells 2857 2858 return self.tableparser.dump() 2859 2860 def quote(self): 2861 tagged = [] 2862 myre = regex['quote'] 2863 open = TAGS['blockQuoteOpen'] # block based 2864 close = TAGS['blockQuoteClose'] 2865 qline = TAGS['blockQuoteLine'] # line based 2866 indent = tagindent = '\t'*self.depth 2867 if rules['tagnotindentable']: tagindent = '' 2868 if not rules['keepquoteindent']: indent = '' 2869 2870 if open: tagged.append(tagindent+open) # open block 2871 for item in self.hold(): 2872 if type(item) == type([]): 2873 tagged.extend(item) # subquotes 2874 else: 2875 item = myre.sub('', item) # del TABs 2876 if rules['barinsidequote']: 2877 item = get_tagged_bar(item) 2878 item = self._last_escapes(item) 2879 item = qline*self.depth + item 2880 tagged.append(indent+item) # quote line 2881 if close: tagged.append(tagindent+close) # close block 2882 return tagged 2883 2884 def deflist(self): return self.list('deflist') 2885 def numlist(self): return self.list('numlist') 2886 def list(self, name='list'): 2887 tagged = [] 2888 items = self.hold() 2889 indent = self.prop('indent') 2890 tagindent = indent 2891 listopen = TAGS.get(name+'Open') 2892 listclose = TAGS.get(name+'Close') 2893 listline = TAGS.get(name+'ItemLine') 2894 itemcount = 0 2895 if rules['tagnotindentable']: tagindent = '' 2896 if not rules['keeplistindent']: indent = '' 2897 2898 if name == 'deflist': 2899 itemopen = TAGS[name+'Item1Open'] 2900 itemclose = TAGS[name+'Item2Close'] 2901 itemsep = TAGS[name+'Item1Close']+\ 2902 TAGS[name+'Item2Open'] 2903 else: 2904 itemopen = TAGS[name+'ItemOpen'] 2905 itemclose = TAGS[name+'ItemClose'] 2906 itemsep = '' 2907 2908 # ItemLine: number of leading chars identifies list depth 2909 if listline: 2910 itemopen = listline*self.depth 2911 # dirty fix for mgp 2912 if name == 'numlist': itemopen = itemopen + '\a. ' 2913 2914 # remove two-blanks from list ending mark, to avoid <p> 2915 items[-1] = self._remove_twoblanks(items[-1]) 2916 2917 # open list (not nestable lists are only opened at mother) 2918 if listopen and not \ 2919 (rules['listnotnested'] and BLOCK.depth != 1): 2920 tagged.append(tagindent+listopen) 2921 2922 # tag each list item (multine items) 2923 itemopenorig = itemopen 2924 2925 2926 for item in items: 2927 2928 # add "manual" item count for noautonum targets 2929 itemcount = itemcount + 1 2930 if name == 'numlist' and not rules['autonumberlist']: 2931 n = str(itemcount) 2932 itemopen = regex['x'].sub(n, itemopenorig) 2933 del n 2934 2935 item[0] = self._last_escapes(item[0]) 2936 2937 if name == 'deflist': 2938 term, rest = string.split(item[0],SEPARATOR,1) 2939 item[0] = rest 2940 if not item[0]: del item[0] # to avoid <p> 2941 tagged.append(tagindent+itemopen+term+itemsep) 2942 else: 2943 fullitem = tagindent+itemopen 2944 tagged.append(string.replace( 2945 item[0], SEPARATOR, fullitem)) 2946 del item[0] 2947 2948 # process next lines for this item (if any) 2949 for line in item: 2950 if type(line) == type([]): # sublist inside 2951 tagged.extend(line) 2952 else: 2953 line = self._last_escapes(line) 2954 # blank lines turns to <p> 2955 if not line and rules['parainsidelist']: 2956 line = string.rstrip(indent +\ 2957 TAGS['paragraphOpen']+\ 2958 TAGS['paragraphClose']) 2959 if not rules['keeplistindent']: 2960 line = string.lstrip(line) 2961 tagged.append(line) 2962 2963 # close item (if needed) 2964 if itemclose: tagged.append(tagindent+itemclose) 2965 2966 # close list (not nestable lists are only closed at mother) 2967 if listclose and not \ 2968 (rules['listnotnested'] and BLOCK.depth != 1): 2969 tagged.append(tagindent+listclose) 2970 2971 if rules['blankendmotherlist'] and BLOCK.depth == 1: 2972 tagged.append('') 2973 2974 return tagged 2975 2976 2977############################################################################## 2978 2979 2980class MacroMaster: 2981 def __init__(self, config={}): 2982 self.name = '' 2983 self.config = config or CONF 2984 self.infile = self.config['sourcefile'] 2985 self.outfile = self.config['outfile'] 2986 self.currdate = time.localtime(time.time()) 2987 self.rgx = regex.get('macros') or getRegexes()['macros'] 2988 self.fileinfo = { 'infile': None, 'outfile': None } 2989 self.dft_fmt = MACROS 2990 2991 def walk_file_format(self, fmt): 2992 "Walks the %%{in/out}file format string, expanding the % flags" 2993 i = 0; ret = '' # counter/hold 2994 while i < len(fmt): # char by char 2995 c = fmt[i]; i = i + 1 2996 if c == '%': # hot char! 2997 if i == len(fmt): # % at the end 2998 ret = ret + c 2999 break 3000 c = fmt[i]; i = i + 1 # read next 3001 ret = ret + self.expand_file_flag(c) 3002 else: 3003 ret = ret +c # common char 3004 return ret 3005 3006 def expand_file_flag(self, flag): 3007 "%f: filename %F: filename (w/o extension)" 3008 "%d: dirname %D: dirname (only parent dir)" 3009 "%p: file path %e: extension" 3010 info = self.fileinfo[self.name] # get dict 3011 if flag == '%': x = '%' # %% -> % 3012 elif flag == 'f': x = info['name'] 3013 elif flag == 'F': x = re.sub('\.[^.]*$','',info['name']) 3014 elif flag == 'd': x = info['dir'] 3015 elif flag == 'D': x = os.path.split(info['dir'])[-1] 3016 elif flag == 'p': x = info['path'] 3017 elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name'] 3018 ).group(2) or '' 3019 #TODO simplier way for %e ? 3020 else : x = '%'+flag # false alarm 3021 return x 3022 3023 def set_file_info(self, macroname): 3024 if self.fileinfo.get(macroname): return # already done 3025 file = getattr(self, self.name) # self.infile 3026 if file in [STDOUT, MODULEOUT]: 3027 dir = ''; path = name = file 3028 else: 3029 path = os.path.abspath(file) 3030 dir = os.path.dirname(path) 3031 name = os.path.basename(path) 3032 self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name} 3033 3034 def expand(self, line=''): 3035 "Expand all macros found on the line" 3036 while self.rgx.search(line): 3037 m = self.rgx.search(line) 3038 name = self.name = string.lower(m.group('name')) 3039 fmt = m.group('fmt') or self.dft_fmt.get(name) 3040 if name == 'date': 3041 txt = time.strftime(fmt,self.currdate) 3042 elif name == 'mtime': 3043 if self.infile in [STDIN, MODULEIN]: 3044 fdate = self.currdate 3045 else: 3046 mtime = os.path.getmtime(self.infile) 3047 fdate = time.localtime(mtime) 3048 txt = time.strftime(fmt,fdate) 3049 elif name in ['infile','outfile']: 3050 self.set_file_info(name) 3051 txt = self.walk_file_format(fmt) 3052 else: 3053 Error("Unknown macro name '%s'"%name) 3054 line = self.rgx.sub(txt,line,1) 3055 return line 3056 3057 3058############################################################################## 3059 3060 3061def dumpConfig(source_raw, parsed_config): 3062 onoff = {1:_('ON'), 0:_('OFF')} 3063 data = [ 3064 (_('RC file') , RC_RAW ), 3065 (_('source document'), source_raw ), 3066 (_('command line') , CMDLINE_RAW) 3067 ] 3068 # first show all RAW data found 3069 for label, cfg in data: 3070 print _('RAW config for %s')%label 3071 for target,key,val in cfg: 3072 target = '(%s)'%target 3073 key = dotted_spaces("%-14s"%key) 3074 val = val or _('ON') 3075 print ' %-8s %s: %s'%(target,key,val) 3076 print 3077 # then the parsed results of all of them 3078 print _('Full PARSED config') 3079 keys = parsed_config.keys() ; keys.sort() # sorted 3080 for key in keys: 3081 val = parsed_config[key] 3082 # filters are the last 3083 if key in ['preproc', 'postproc']: 3084 continue 3085 # flag beautifier 3086 if key in FLAGS.keys()+ACTIONS.keys(): 3087 val = onoff.get(val) or val 3088 # list beautifier 3089 if type(val) == type([]): 3090 if key == 'options': sep = ' ' 3091 else : sep = ', ' 3092 val = string.join(val, sep) 3093 print "%25s: %s"%(dotted_spaces("%-14s"%key),val) 3094 print 3095 print _('Active filters') 3096 for filter in ['preproc','postproc']: 3097 for rule in parsed_config.get(filter) or []: 3098 print "%25s: %s -> %s"%( 3099 dotted_spaces("%-14s"%filter),rule[0],rule[1]) 3100 3101 3102def get_file_body(file): 3103 "Returns all the document BODY lines" 3104 return process_source_file(file, noconf=1)[1][2] 3105 3106 3107def finish_him(outlist, config): 3108 "Writing output to screen or file" 3109 outfile = config['outfile'] 3110 outlist = unmaskEscapeChar(outlist) 3111 outlist = expandLineBreaks(outlist) 3112 3113 # apply PostProc filters 3114 if config['postproc']: 3115 filters = compile_filters(config['postproc'], 3116 _('Invalid PostProc filter regex')) 3117 postoutlist = [] 3118 errmsg = _('Invalid PostProc filter replacement') 3119 for line in outlist: 3120 for rgx,repl in filters: 3121 try: line = rgx.sub(repl, line) 3122 except: Error("%s: '%s'"%(errmsg, repl)) 3123 postoutlist.append(line) 3124 outlist = postoutlist[:] 3125 3126 if outfile == MODULEOUT: 3127 return outlist 3128 elif outfile == STDOUT: 3129 if GUI: 3130 return outlist, config 3131 else: 3132 for line in outlist: print line 3133 else: 3134 Savefile(outfile, addLineBreaks(outlist)) 3135 if not GUI and not QUIET: 3136 print _('%s wrote %s')%(my_name,outfile) 3137 3138 if config['split']: 3139 if not QUIET: print "--- html..." 3140 sgml2html = 'sgml2html -s %s -l %s %s'%( 3141 config['split'],config['lang'] or lang,outfile) 3142 if not QUIET: print "Running system command:", sgml2html 3143 os.system(sgml2html) 3144 3145 3146def toc_inside_body(body, toc, config): 3147 ret = [] 3148 if AUTOTOC: return body # nothing to expand 3149 toc_mark = MaskMaster().tocmask 3150 # expand toc mark with TOC contents 3151 for line in body: 3152 if string.count(line, toc_mark): # toc mark found 3153 if config['toc']: 3154 ret.extend(toc) # include if --toc 3155 else: 3156 pass # or remove %%toc line 3157 else: 3158 ret.append(line) # common line 3159 return ret 3160 3161def toc_tagger(toc, config): 3162 "Convert t2t-marked TOC (it is a list) to target-tagged TOC" 3163 ret = [] 3164 # tag if TOC-only TOC "by hand" (target don't have a TOC tag) 3165 if config['toc-only'] or (config['toc'] and not TAGS['TOC']): 3166 fakeconf = config.copy() 3167 fakeconf['headers'] = 0 3168 fakeconf['toc-only'] = 0 3169 fakeconf['mask-email'] = 0 3170 fakeconf['preproc'] = [] 3171 fakeconf['postproc'] = [] 3172 fakeconf['css-sugar'] = 0 3173 ret,foo = convert(toc, fakeconf) 3174 set_global_config(config) # restore config 3175 # target TOC is a tag 3176 elif config['toc'] and TAGS['TOC']: 3177 ret = [TAGS['TOC']] 3178 return ret 3179 3180def toc_formatter(toc, config): 3181 "Formats TOC for automatic placement between headers and body" 3182 if config['toc-only']: return toc # no formatting needed 3183 if not config['toc'] : return [] # TOC disabled 3184 ret = toc 3185 # TOC open/close tags (if any) 3186 if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen']) 3187 if TAGS['tocClose']: ret.append(TAGS['tocClose']) 3188 # autotoc specific formatting 3189 if AUTOTOC: 3190 if rules['autotocwithbars']: # TOC between bars 3191 para = TAGS['paragraphOpen']+TAGS['paragraphClose'] 3192 bar = regex['x'].sub('-'*72,TAGS['bar1']) 3193 tocbar = [para, bar, para] 3194 ret = tocbar + ret + tocbar 3195 if rules['blankendautotoc']: # blank line after TOC 3196 ret.append('') 3197 if rules['autotocnewpagebefore']: # page break before TOC 3198 ret.insert(0,TAGS['pageBreak']) 3199 if rules['autotocnewpageafter']: # page break after TOC 3200 ret.append(TAGS['pageBreak']) 3201 return ret 3202 3203 3204def doHeader(headers, config): 3205 if not config['headers']: return [] 3206 if not headers: headers = ['','',''] 3207 target = config['target'] 3208 if not HEADER_TEMPLATE.has_key(target): 3209 Error("doheader: Unknow target '%s'"%target) 3210 3211 if target in ['html','xhtml'] and config.get('css-sugar'): 3212 template = string.split(HEADER_TEMPLATE[target+'css'], '\n') 3213 else: 3214 template = string.split(HEADER_TEMPLATE[target], '\n') 3215 3216 head_data = {'STYLE':'', 'ENCODING':''} 3217 for key in head_data.keys(): 3218 val = config.get(string.lower(key)) 3219 if key == 'ENCODING': val = get_encoding_string(val, target) 3220 head_data[key] = val 3221 # parse header contents 3222 for i in 0,1,2: 3223 # expand macros 3224 contents = MacroMaster(config=config).expand(headers[i]) 3225 # Escapes - on tex, just do it if any \tag{} present 3226 if target != 'tex' or \ 3227 (target == 'tex' and re.search(r'\\\w+{', contents)): 3228 contents = doEscape(target, contents) 3229 if target == 'lout': 3230 contents = doFinalEscape(target, contents) 3231 3232 head_data['HEADER%d'%(i+1)] = contents 3233 # css-inside removes STYLE line 3234 if target in ['html','xhtml'] and config.get('css-inside') and \ 3235 config.get('style'): 3236 head_data['STYLE'] = '' 3237 Debug("Header Data: %s"%head_data, 1) 3238 # scan for empty dictionary keys 3239 # if found, scan template lines for that key reference 3240 # if found, remove the reference 3241 # if there isn't any other key reference on the same line, remove it 3242 for key in head_data.keys(): 3243 if head_data.get(key): continue 3244 for line in template: 3245 if string.count(line, '%%(%s)s'%key): 3246 sline = string.replace(line, '%%(%s)s'%key, '') 3247 if not re.search(r'%\([A-Z0-9]+\)s', sline): 3248 template.remove(line) 3249 # populate template with data 3250 template = string.join(template, '\n') % head_data 3251 3252 # adding CSS contents into template (for --css-inside) 3253 if target in ['html','xhtml'] and config.get('css-inside') and \ 3254 config.get('style'): 3255 TAGS = getTags(config) 3256 cssfile = config['style'] 3257 if not os.path.isabs(cssfile): 3258 infile = config.get('sourcefile') 3259 cssfile = os.path.join(os.path.dirname(infile), cssfile) 3260 css = string.join(Readfile(cssfile, 1, 1), '\n') 3261 css = "%s\n%s\n%s\n" % (TAGS['cssOpen'], css, TAGS['cssClose']) 3262 template = re.sub('(?i)(</HEAD>)', css+r'\1', template) 3263 3264 return string.split(template, '\n') 3265 3266def doCommentLine(txt): 3267 # the -- string ends a (h|sg|xht)ml comment :( 3268 txt = maskEscapeChar(txt) 3269 if string.count(TAGS['comment'], '--') and \ 3270 string.count(txt, '--'): 3271 txt = re.sub('-(?=-)', r'-\\', txt) 3272 3273 if TAGS['comment']: 3274 return regex['x'].sub(txt, TAGS['comment']) 3275 return '' 3276 3277def doFooter(config): 3278 if not config['headers']: return [] 3279 ret = [] 3280 target = config['target'] 3281 cmdline = config['realcmdline'] 3282 typename = target 3283 if target == 'tex': typename = 'LaTeX2e' 3284 ppgd = '%s code generated by %s %s (%s)'%( 3285 typename,my_name,my_version,my_url) 3286 cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' ')) 3287 ret.append('') 3288 ret.append(doCommentLine(ppgd)) 3289 ret.append(doCommentLine(cmdline)) 3290 ret.append(TAGS['EOD']) 3291 return ret 3292 3293def doEscape(target,txt): 3294 "Target-specific special escapes. Apply *before* insert any tag." 3295 tmpmask = 'vvvvThisEscapingSuxvvvv' 3296 if target in ['html','sgml','xhtml']: 3297 txt = re.sub('&','&',txt) 3298 txt = re.sub('<','<',txt) 3299 txt = re.sub('>','>',txt) 3300 if target == 'sgml': 3301 txt = re.sub('\xff','ÿ',txt) # "+y 3302 elif target == 'pm6': 3303 txt = re.sub('<','<\#60>',txt) 3304 elif target == 'mgp': 3305 txt = re.sub('^%',' %',txt) # add leading blank to avoid parse 3306 elif target == 'man': 3307 txt = re.sub("^([.'])", '\\&\\1',txt) # command ID 3308 txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e 3309 elif target == 'lout': 3310 # TIP: / moved to FinalEscape to avoid //italic// 3311 # TIP: these are also converted by lout: ... --- -- 3312 txt = string.replace(txt, ESCCHAR, tmpmask) # \ 3313 txt = string.replace(txt, '"', '"%s""'%ESCCHAR) # "\"" 3314 txt = re.sub('([|&{}@#^~])', '"\\1"',txt) # "@" 3315 txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2)) # "\\" 3316 elif target == 'tex': 3317 # mark literal \ to be changed to $\backslash$ later 3318 txt = string.replace( txt, ESCCHAR, tmpmask) 3319 txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \% 3320 txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{} 3321 txt = re.sub('([<|>])' , r'$\1$', txt) # $>$ 3322 txt = string.replace(txt, tmpmask, 3323 maskEscapeChar(r'$\backslash$')) 3324 # TIP the _ is escaped at the end 3325 return txt 3326 3327# TODO man: where - really needs to be escaped? 3328def doFinalEscape(target, txt): 3329 "Last escapes of each line" 3330 if target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><') 3331 elif target == 'man' : txt = string.replace(txt, '-', r'\-') 3332 elif target == 'sgml': txt = string.replace(txt, '[', '[') 3333 elif target == 'lout': txt = string.replace(txt, '/', '"/"') 3334 elif target == 'tex' : 3335 txt = string.replace(txt, '_', r'\_') 3336 txt = string.replace(txt, 'vvvvTexUndervvvv', '_') # shame! 3337 return txt 3338 3339def EscapeCharHandler(action, data): 3340 "Mask/Unmask the Escape Char on the given string" 3341 if not string.strip(data): return data 3342 if action not in ['mask','unmask']: 3343 Error("EscapeCharHandler: Invalid action '%s'"%action) 3344 if action == 'mask': return string.replace(data,'\\',ESCCHAR) 3345 else: return string.replace(data,ESCCHAR,'\\') 3346 3347def maskEscapeChar(data): 3348 "Replace any Escape Char \ with a text mask (Input: str or list)" 3349 if type(data) == type([]): 3350 return map(lambda x: EscapeCharHandler('mask', x), data) 3351 return EscapeCharHandler('mask',data) 3352 3353def unmaskEscapeChar(data): 3354 "Undo the Escape char \ masking (Input: str or list)" 3355 if type(data) == type([]): 3356 return map(lambda x: EscapeCharHandler('unmask', x), data) 3357 return EscapeCharHandler('unmask',data) 3358 3359def addLineBreaks(mylist): 3360 "use LB to respect sys.platform" 3361 ret = [] 3362 for line in mylist: 3363 line = string.replace(line,'\n',LB) # embedded \n's 3364 ret.append(line+LB) # add final line break 3365 return ret 3366 3367# convert ['foo\nbar'] to ['foo', 'bar'] 3368def expandLineBreaks(mylist): 3369 ret = [] 3370 for line in mylist: 3371 ret.extend(string.split(line, '\n')) 3372 return ret 3373 3374def compile_filters(filters, errmsg='Filter'): 3375 if filters: 3376 for i in range(len(filters)): 3377 patt,repl = filters[i] 3378 try: rgx = re.compile(patt) 3379 except: Error("%s: '%s'"%(errmsg, patt)) 3380 filters[i] = (rgx,repl) 3381 return filters 3382 3383def enclose_me(tagname, txt): 3384 return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close') 3385 3386def beautify_me(name, line): 3387 "where name is: bold, italic or underline" 3388 name = 'font%s' % string.capitalize(name) 3389 open = TAGS['%sOpen'%name] 3390 close = TAGS['%sClose'%name] 3391 txt = r'%s\1%s'%(open, close) 3392 line = regex[name].sub(txt,line) 3393 return line 3394 3395def get_tagged_link(label, url): 3396 ret = '' 3397 target = CONF['target'] 3398 image_re = regex['img'] 3399 3400 # set link type 3401 if regex['email'].match(url): 3402 linktype = 'email' 3403 else: 3404 linktype = 'url'; 3405 3406 # escape specials from TEXT parts 3407 label = doEscape(target,label) 3408 3409 # escape specials from link URL 3410 if rules['linkable'] and rules['escapeurl']: 3411 url = doEscape(target, url) 3412 3413 # if not linkable, the URL is plain text, that needs escape 3414 if not rules['linkable']: 3415 if target == 'tex': 3416 url = re.sub('^#', '\#', url) # ugly, but compile 3417 else: 3418 url = doEscape(target,url) 3419 3420 # adding protocol to guessed link 3421 guessurl = '' 3422 if linktype == 'url' and \ 3423 re.match(regex['_urlskel']['guess'], url): 3424 if url[0] == 'w': guessurl = 'http://' +url 3425 else : guessurl = 'ftp://' +url 3426 3427 # not link aware targets -> protocol is useless 3428 if not rules['linkable']: guessurl = '' 3429 3430 # simple link (not guessed) 3431 if not label and not guessurl: 3432 if CONF['mask-email'] and linktype == 'email': 3433 # do the email mask feature (no TAGs, just text) 3434 url = string.replace(url,'@',' (a) ') 3435 url = string.replace(url,'.',' ') 3436 url = "<%s>" % url 3437 if rules['linkable']: url = doEscape(target, url) 3438 ret = url 3439 else: 3440 # just add link data to tag 3441 tag = TAGS[linktype] 3442 ret = regex['x'].sub(url,tag) 3443 3444 # named link or guessed simple link 3445 else: 3446 # adjusts for guessed link 3447 if not label: label = url # no protocol 3448 if guessurl : url = guessurl # with protocol 3449 3450 # image inside link! 3451 if image_re.match(label): 3452 if rules['imglinkable']: # get image tag 3453 label = parse_images(label) 3454 else: # img@link !supported 3455 label = "(%s)"%image_re.match(label).group(1) 3456 3457 # putting data on the right appearance order 3458 if rules['linkable']: 3459 urlorder = [url, label] # link before label 3460 else: 3461 urlorder = [label, url] # label before link 3462 3463 # add link data to tag (replace \a's) 3464 ret = TAGS["%sMark"%linktype] 3465 for data in urlorder: 3466 ret = regex['x'].sub(data,ret,1) 3467 3468 return ret 3469 3470 3471def parse_deflist_term(line): 3472 "Extract and parse definition list term contents" 3473 img_re = regex['img'] 3474 term = regex['deflist'].search(line).group(3) 3475 3476 # mask image inside term as (image.jpg), where not supported 3477 if not rules['imgasdefterm'] and img_re.search(term): 3478 while img_re.search(term): 3479 imgfile = img_re.search(term).group(1) 3480 term = img_re.sub('(%s)'%imgfile, term, 1) 3481 3482 #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :( 3483 return term 3484 3485 3486def get_tagged_bar(line): 3487 m = regex['bar'].search(line) 3488 if not m: return line 3489 txt = m.group(2) 3490 3491 # map strong bar to pagebreak 3492 if rules['mapbar2pagebreak'] and TAGS['pageBreak']: 3493 TAGS['bar2'] = TAGS['pageBreak'] 3494 3495 # set bar type 3496 if txt[0] == '=': bar = TAGS['bar2'] 3497 else : bar = TAGS['bar1'] 3498 3499 # to avoid comment tag confusion like <!-- ------ --> 3500 if string.count(TAGS['comment'], '--'): 3501 txt = string.replace(txt,'--','__') 3502 3503 # tag line 3504 return regex['x'].sub(txt, bar) 3505 3506 3507def get_image_align(line): 3508 "Return the image (first found) align for the given line" 3509 3510 # first clear marks that can mess align detection 3511 line = re.sub(SEPARATOR+'$', '', line) # remove deflist sep 3512 line = re.sub('^'+SEPARATOR, '', line) # remove list sep 3513 line = re.sub('^[\t]+' , '', line) # remove quote mark 3514 3515 # get image position on the line 3516 m = regex['img'].search(line) 3517 ini = m.start() ; head = 0 3518 end = m.end() ; tail = len(line) 3519 3520 # the align detection algorithm 3521 if ini == head and end != tail: align = 'left' # ^img + text$ 3522 elif ini != head and end == tail: align = 'right' # ^text + img$ 3523 else : align = 'center' # default align 3524 3525 # some special cases 3526 if BLOCK.isblock('table'): align = 'center' # ignore when table 3527# if TARGET == 'mgp' and align == 'center': align = 'center' 3528 3529 return align 3530 3531 3532# reference: http://www.iana.org/assignments/character-sets 3533# http://www.drclue.net/F1.cgi/HTML/META/META.html 3534def get_encoding_string(enc, target): 3535 if not enc: return '' 3536 # target specific translation table 3537 translate = { 3538 'tex': { 3539 # missing: ansinew , applemac , cp437 , cp437de , cp865 3540 'us-ascii' : 'ascii', 3541 'windows-1250': 'cp1250', 3542 'windows-1252': 'cp1252', 3543 'ibm850' : 'cp850', 3544 'ibm852' : 'cp852', 3545 'iso-8859-1' : 'latin1', 3546 'iso-8859-2' : 'latin2', 3547 'iso-8859-3' : 'latin3', 3548 'iso-8859-4' : 'latin4', 3549 'iso-8859-5' : 'latin5', 3550 'iso-8859-9' : 'latin9', 3551 'koi8-r' : 'koi8-r' 3552 } 3553 } 3554 # normalization 3555 enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc) 3556 enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc) 3557 enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc) 3558 enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc) 3559 # apply translation table 3560 try: enc = translate[target][string.lower(enc)] 3561 except: pass 3562 return enc 3563 3564 3565############################################################################## 3566##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove## 3567############################################################################## 3568 3569 3570def process_source_file(file='', noconf=0, contents=[]): 3571 """ 3572 Find and Join all the configuration available for a source file. 3573 No sanity checkings are done on this step. 3574 It also extracts the source document parts into separate holders. 3575 3576 The config scan order is: 3577 1. The user configuration file (i.e. $HOME/.txt2tagsrc) 3578 2. The source document's CONF area 3579 3. The command line options 3580 3581 The return data is a tuple of two items: 3582 1. The parsed config dictionary 3583 2. The document's parts, as a (head, conf, body) tuple 3584 3585 All the conversion process will be based on the data and 3586 configuration returned by this function. 3587 The source files is readed on this step only. 3588 """ 3589 if contents: 3590 source = SourceDocument(contents=contents) 3591 else: 3592 source = SourceDocument(file) 3593 head, conf, body = source.split() 3594 Message(_("Source document contents stored"),2) 3595 if not noconf: 3596 # read document config 3597 source_raw = source.get_raw_config() 3598 # join all the config directives found, then parse it 3599 full_raw = RC_RAW + source_raw + CMDLINE_RAW 3600 Message(_("Parsing and saving all config found (%03d items)")%( 3601 len(full_raw)),1) 3602 full_parsed = ConfigMaster(full_raw).parse() 3603 # add manually the filemane to the conf dic 3604 if contents: 3605 full_parsed['sourcefile'] = MODULEIN 3606 full_parsed['infile'] = MODULEIN 3607 full_parsed['outfile'] = MODULEOUT 3608 else: 3609 full_parsed['sourcefile'] = file 3610 # maybe should we dump the config found? 3611 if full_parsed.get('dump-config'): 3612 dumpConfig(source_raw, full_parsed) 3613 Quit() 3614 # okay, all done 3615 Debug("FULL config for this file: %s"%full_parsed, 1) 3616 else: 3617 full_parsed = {} 3618 return full_parsed, (head,conf,body) 3619 3620def get_infiles_config(infiles): 3621 """ 3622 Find and Join into a single list, all configuration available 3623 for each input file. This function is supposed to be the very 3624 first one to be called, before any processing. 3625 """ 3626 ret = [] 3627 if not infiles: return [] 3628 for infile in infiles: 3629 ret.append((process_source_file(infile))) 3630 return ret 3631 3632def convert_this_files(configs): 3633 global CONF 3634 for myconf,doc in configs: # multifile support 3635 target_head = [] 3636 target_toc = [] 3637 target_body = [] 3638 target_foot = [] 3639 source_head, source_conf, source_body = doc 3640 myconf = ConfigMaster().sanity(myconf) 3641 # compose the target file Headers 3642 #TODO escape line before? 3643 #TODO see exceptions by tex and mgp 3644 Message(_("Composing target Headers"),1) 3645 target_head = doHeader(source_head, myconf) 3646 # parse the full marked body into tagged target 3647 first_body_line = (len(source_head) or 1)+ len(source_conf) + 1 3648 Message(_("Composing target Body"),1) 3649 target_body, marked_toc = convert(source_body, myconf, 3650 firstlinenr=first_body_line) 3651 # if dump-source, we're done 3652 if myconf['dump-source']: 3653 for line in source_head+source_conf+target_body: 3654 print line 3655 return 3656 # make TOC (if needed) 3657 Message(_("Composing target TOC"),1) 3658 tagged_toc = toc_tagger(marked_toc, myconf) 3659 target_toc = toc_formatter(tagged_toc, myconf) 3660 target_body = toc_inside_body(target_body, target_toc, myconf) 3661 if not AUTOTOC and not myconf['toc-only']: target_toc = [] 3662 # compose the target file Footer 3663 Message(_("Composing target Footer"),1) 3664 target_foot = doFooter(myconf) 3665 # finally, we have our document 3666 outlist = target_head + target_toc + target_body + target_foot 3667 # if on GUI, abort before finish_him 3668 # if module, return finish_him as list 3669 # else, write results to file or STDOUT 3670 if GUI: 3671 return outlist, myconf 3672 elif myconf.get('outfile') == MODULEOUT: 3673 return finish_him(outlist, myconf), myconf 3674 else: 3675 Message(_("Saving results to the output file"),1) 3676 finish_him(outlist, myconf) 3677 3678 3679def parse_images(line): 3680 "Tag all images found" 3681 while regex['img'].search(line) and TAGS['img'] != '[\a]': 3682 txt = regex['img'].search(line).group(1) 3683 tag = TAGS['img'] 3684 3685 # HTML, XHTML and mgp! 3686 if rules['imgalignable']: 3687 align = get_image_align(line) 3688 # add align on tag 3689 align_name = string.capitalize(align) 3690 align_tag = TAGS['imgAlign'+align_name] 3691 tag = regex['_imgAlign'].sub(align_tag, tag, 1) 3692 # dirty fix to allow centered solo images 3693 if align == 'center' and TARGET in ['html','xhtml']: 3694 rest = regex['img'].sub('',line,1) 3695 if re.match('^\s+$', rest): 3696 tag = "<center>%s</center>" %tag 3697 3698 if TARGET == 'tex': 3699 tag = re.sub(r'\\b',r'\\\\b',tag) 3700 txt = string.replace(txt, '_', 'vvvvTexUndervvvv') 3701 3702 line = regex['img'].sub(tag,line,1) 3703 line = regex['x'].sub(txt,line,1) 3704 return line 3705 3706 3707def add_inline_tags(line): 3708 # beautifiers 3709 for beauti in ['Bold', 'Italic', 'Underline']: 3710 if regex['font%s'%beauti].search(line): 3711 line = beautify_me(beauti, line) 3712 3713 line = parse_images(line) 3714 return line 3715 3716 3717def get_include_contents(file, path=''): 3718 "Parses %!include: value and extract file contents" 3719 ids = {'`':'verb', '"':'raw', "'":'passthru' } 3720 id = 't2t' 3721 # set include type and remove identifier marks 3722 mark = file[0] 3723 if mark in ids.keys(): 3724 if file[:2] == file[-2:] == mark*2: 3725 id = ids[mark] # set type 3726 file = file[2:-2] # remove marks 3727 # handle remote dir execution 3728 filepath = os.path.join(path, file) 3729 # read included file contents 3730 lines = Readfile(filepath, remove_linebreaks=1) 3731 # default txt2tags marked text, just BODY matters 3732 if id == 't2t': 3733 lines = get_file_body(filepath) 3734 lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file)) 3735 # This appears when included hit EOF with verbatim area open 3736 #lines.append('%%INCLUDED(%s) ends here: %s'%(id,file)) 3737 return id, lines 3738 3739 3740def set_global_config(config): 3741 global CONF, TAGS, regex, rules, TARGET 3742 CONF = config 3743 TAGS = getTags(CONF) 3744 rules = getRules(CONF) 3745 regex = getRegexes() 3746 TARGET = config['target'] # save for buggy functions that need global 3747 3748 3749def convert(bodylines, config, firstlinenr=1): 3750 global BLOCK 3751 3752 set_global_config(config) 3753 3754 target = config['target'] 3755 BLOCK = BlockMaster() 3756 MASK = MaskMaster() 3757 TITLE = TitleMaster() 3758 3759 ret = [] 3760 dump_source = [] 3761 f_lastwasblank = 0 3762 3763 # compiling all PreProc regexes 3764 pre_filter = compile_filters( 3765 CONF['preproc'], _('Invalid PreProc filter regex')) 3766 3767 # let's mark it up! 3768 linenr = firstlinenr-1 3769 lineref = 0 3770 while lineref < len(bodylines): 3771 # defaults 3772 MASK.reset() 3773 results_box = '' 3774 3775 untouchedline = bodylines[lineref] 3776 dump_source.append(untouchedline) 3777 3778 line = re.sub('[\n\r]+$','',untouchedline) # del line break 3779 3780 # apply PreProc filters 3781 if pre_filter: 3782 errmsg = _('Invalid PreProc filter replacement') 3783 for rgx,repl in pre_filter: 3784 try: line = rgx.sub(repl, line) 3785 except: Error("%s: '%s'"%(errmsg, repl)) 3786 3787 line = maskEscapeChar(line) # protect \ char 3788 linenr = linenr +1 3789 lineref = lineref +1 3790 3791 Debug(repr(line), 2, linenr) # heavy debug: show each line 3792 3793 # any NOT table line (or comment), closes an open table 3794 if ( BLOCK.isblock('table') or 3795 ( BLOCK.isblock('verb') and 3796 BLOCK.prop('mapped') == 'table' 3797 ) 3798 ) \ 3799 and not regex['table'].search(line) \ 3800 and not regex['comment'].search(line): 3801 ret.extend(BLOCK.blockout()) 3802 3803 # any NOT quote line (or comment) closes all open quotes 3804 if BLOCK.isblock('quote') \ 3805 and not regex['quote'].search(line) \ 3806 and not regex['comment'].search(line): 3807 while BLOCK.isblock('quote'): 3808 ret.extend(BLOCK.blockout()) 3809 3810 3811 #-------------------------[ Raw Text ]---------------------- 3812 3813 # we're already on a raw block 3814 if BLOCK.block() == 'raw': 3815 3816 # closing raw 3817 if regex['blockRawClose'].search(line): 3818 ret.extend(BLOCK.blockout()) 3819 continue 3820 3821 # normal raw-inside line 3822 BLOCK.holdadd(line) 3823 continue 3824 3825 # detecting raw block init 3826 if regex['blockRawOpen'].search(line): 3827 ret.extend(BLOCK.blockin('raw')) 3828 continue 3829 3830 # one line verb-formatted text 3831 if regex['1lineRaw'].search(line): 3832 ret.extend(BLOCK.blockin('raw')) 3833 line = regex['1lineRaw'].sub('',line) 3834 BLOCK.holdadd(line) 3835 ret.extend(BLOCK.blockout()) 3836 continue 3837 3838 #-----------------[ Verbatim (PRE-formatted) ]-------------- 3839 3840 #TIP we'll never support beautifiers inside verbatim 3841 3842 # we're already on a verb block 3843 if BLOCK.block() == 'verb': 3844 3845 # closing verb 3846 if regex['blockVerbClose'].search(line): 3847 ret.extend(BLOCK.blockout()) 3848 continue 3849 3850 # normal verb-inside line 3851 BLOCK.holdadd(line) 3852 continue 3853 3854 # detecting verb block init 3855 if regex['blockVerbOpen'].search(line): 3856 ret.extend(BLOCK.blockin('verb')) 3857 f_lastwasblank = 0 3858 continue 3859 3860 # one line verb-formatted text 3861 if regex['1lineVerb'].search(line): 3862 ret.extend(BLOCK.blockin('verb')) 3863 line = regex['1lineVerb'].sub('',line) 3864 BLOCK.holdadd(line) 3865 ret.extend(BLOCK.blockout()) 3866 f_lastwasblank = 0 3867 continue 3868 3869 # tables are mapped to verb when target is not table-aware 3870 if not rules['tableable'] and regex['table'].search(line): 3871 if not BLOCK.isblock('verb'): 3872 ret.extend(BLOCK.blockin('verb')) 3873 BLOCK.propset('mapped', 'table') 3874 BLOCK.holdadd(line) 3875 continue 3876 3877 #---------------------[ blank lines ]----------------------- 3878 3879 if regex['blankline'].search(line): 3880 3881 # close open paragraph 3882 if BLOCK.isblock('para'): 3883 ret.extend(BLOCK.blockout()) 3884 f_lastwasblank = 1 3885 continue 3886 3887 # close all open quotes 3888 while BLOCK.isblock('quote'): 3889 ret.extend(BLOCK.blockout()) 3890 3891 # closing all open lists 3892 if f_lastwasblank: # 2nd consecutive blank 3893 if BLOCK.block()[-4:] == 'list': 3894 BLOCK.holdaddsub('') # helps parser 3895 while BLOCK.depth: # closes list (if any) 3896 ret.extend(BLOCK.blockout()) 3897 continue # ignore consecutive blanks 3898 3899 # paragraph (if any) is wanted inside lists also 3900 if BLOCK.block()[-4:] == 'list': 3901 BLOCK.holdaddsub('') 3902 else: 3903 # html: show blank line (needs tag) 3904 if target in ['html','xhtml']: 3905 ret.append(TAGS['paragraphOpen']+\ 3906 TAGS['paragraphClose']) 3907 # otherwise we just show a blank line 3908 else: 3909 ret.append('') 3910 3911 f_lastwasblank = 1 3912 continue 3913 3914 3915 #---------------------[ special ]--------------------------- 3916 3917 if regex['special'].search(line): 3918 # include command 3919 targ, key, val = ConfigLines().parse_line( 3920 line, 'include', target) 3921 if key: 3922 Debug("Found config '%s', value '%s'"%( 3923 key,val),1,linenr) 3924 3925 incpath = os.path.dirname(CONF['sourcefile']) 3926 incfile = val 3927 err = _('A file cannot include itself (loop!)') 3928 if CONF['sourcefile'] == incfile: 3929 Error("%s: %s"%(err,incfile)) 3930 inctype, inclines = get_include_contents( 3931 incfile, incpath) 3932 # verb, raw and passthru are easy 3933 if inctype != 't2t': 3934 ret.extend(BLOCK.blockin(inctype)) 3935 BLOCK.holdextend(inclines) 3936 ret.extend(BLOCK.blockout()) 3937 else: 3938 # insert include lines into body 3939 #TODO include maxdepth limit 3940 bodylines = bodylines[:lineref] \ 3941 +inclines \ 3942 +bodylines[lineref:] 3943 #TODO fix path if include@include 3944 # remove %!include call 3945 if CONF['dump-source']: 3946 dump_source.pop() 3947 continue 3948 else: 3949 Debug('Bogus Special Line',1,linenr) 3950 3951 #---------------------[ dump-source ]----------------------- 3952 3953 # we don't need to go any further 3954 if CONF['dump-source']: 3955 continue 3956 3957 #---------------------[ comments ]-------------------------- 3958 3959 # just skip them (if not macro) 3960 if regex['comment'].search(line) and not \ 3961 regex['macros'].match(line) and not \ 3962 regex['toc'].match(line): 3963 continue 3964 3965 # valid line, reset blank status 3966 f_lastwasblank = 0 3967 3968 #---------------------[ Horizontal Bar ]-------------------- 3969 3970 if regex['bar'].search(line): 3971 3972 # a bar closes a paragraph 3973 if BLOCK.isblock('para'): 3974 ret.extend(BLOCK.blockout()) 3975 3976 # we need to close all opened quote blocks 3977 # if bar isn't allowed inside or if not a quote line 3978 if BLOCK.isblock('quote'): 3979 if not rules['barinsidequote'] or \ 3980 not regex['quote'].search(line): 3981 while BLOCK.isblock('quote'): 3982 ret.extend(BLOCK.blockout()) 3983 3984 # quote + bar: continue processing for quoting 3985 if rules['barinsidequote'] and \ 3986 regex['quote'].search(line): 3987 pass 3988 3989 # just bar: save tagged line and we're done 3990 else: 3991 line = get_tagged_bar(line) 3992 if BLOCK.block()[-4:] == 'list': 3993 BLOCK.holdaddsub(line) 3994 elif BLOCK.block(): 3995 BLOCK.holdadd(line) 3996 else: 3997 ret.append(line) 3998 Debug("BAR: %s"%line, 6) 3999 continue 4000 4001 #---------------------[ Title ]----------------------------- 4002 4003 #TODO set next blank and set f_lastwasblank or f_lasttitle 4004 if (regex['title'].search(line) or 4005 regex['numtitle'].search(line)) and \ 4006 BLOCK.block()[-4:] != 'list': 4007 4008 # a title closes a paragraph 4009 if BLOCK.isblock('para'): 4010 ret.extend(BLOCK.blockout()) 4011 4012 TITLE.add(line) 4013 tagged_title = TITLE.get() 4014 ret.extend(tagged_title) 4015 Debug("TITLE: %s"%tagged_title, 6) 4016 4017 f_lastwasblank = 1 4018 continue 4019 4020 #---------------------[ %%toc ]----------------------- 4021 4022 # %%toc line closes paragraph 4023 if BLOCK.block() == 'para' and regex['toc'].search(line): 4024 ret.extend(BLOCK.blockout()) 4025 4026 #---------------------[ apply masks ]----------------------- 4027 4028 line = MASK.mask(line) 4029 4030 #XXX from here, only block-inside lines will pass 4031 4032 #---------------------[ Quote ]----------------------------- 4033 4034 if regex['quote'].search(line): 4035 4036 # store number of leading TABS 4037 quotedepth = len(regex['quote'].search(line).group(0)) 4038 4039 # SGML doesn't support nested quotes 4040 if rules['quotenotnested']: quotedepth = 1 4041 4042 # new quote 4043 if not BLOCK.isblock('quote'): 4044 ret.extend(BLOCK.blockin('quote')) 4045 4046 # new subquotes 4047 while BLOCK.depth < quotedepth: 4048 BLOCK.blockin('quote') 4049 4050 # closing quotes 4051 while quotedepth < BLOCK.depth: 4052 ret.extend(BLOCK.blockout()) 4053 4054 #---------------------[ Lists ]----------------------------- 4055 4056 # an empty item also closes the current list 4057 if BLOCK.block()[-4:] == 'list': 4058 m = regex['listclose'].match(line) 4059 if m: 4060 listindent = m.group(1) 4061 listtype = m.group(2) 4062 currlisttype = BLOCK.prop('type') 4063 currlistindent = BLOCK.prop('indent') 4064 if listindent == currlistindent and \ 4065 listtype == currlisttype: 4066 ret.extend(BLOCK.blockout()) 4067 continue 4068 4069 if regex['list'].search(line) or \ 4070 regex['numlist'].search(line) or \ 4071 regex['deflist'].search(line): 4072 4073 listindent = BLOCK.prop('indent') 4074 listids = string.join(LISTNAMES.keys(), '') 4075 m = re.match('^( *)([%s]) '%listids, line) 4076 listitemindent = m.group(1) 4077 listtype = m.group(2) 4078 listname = LISTNAMES[listtype] 4079 results_box = BLOCK.holdadd 4080 4081 # del list ID (and separate term from definition) 4082 if listname == 'deflist': 4083 term = parse_deflist_term(line) 4084 line = regex['deflist'].sub(term+SEPARATOR,line) 4085 else: 4086 line = regex[listname].sub(SEPARATOR,line) 4087 4088 # don't cross depth limit 4089 maxdepth = rules['listmaxdepth'] 4090 if maxdepth and BLOCK.depth == maxdepth: 4091 if len(listitemindent) > len(listindent): 4092 listitemindent = listindent 4093 4094 # open mother list or sublist 4095 if BLOCK.block()[-4:] != 'list' or \ 4096 len(listitemindent) > len(listindent): 4097 ret.extend(BLOCK.blockin(listname)) 4098 BLOCK.propset('indent',listitemindent) 4099 BLOCK.propset('type',listtype) 4100 4101 # closing sublists 4102 while len(listitemindent) < len(BLOCK.prop('indent')): 4103 ret.extend(BLOCK.blockout()) 4104 4105 # o-oh, sublist before list ("\n\n - foo\n- foo") 4106 # fix: close sublist (as mother), open another list 4107 if BLOCK.block()[-4:] != 'list': 4108 ret.extend(BLOCK.blockin(listname)) 4109 BLOCK.propset('indent',listitemindent) 4110 BLOCK.propset('type',listtype) 4111 4112 #---------------------[ Table ]----------------------------- 4113 4114 #TODO escape undesired format inside table 4115 #TODO add pm6 target 4116 if regex['table'].search(line): 4117 4118 if not BLOCK.isblock('table'): # first table line! 4119 ret.extend(BLOCK.blockin('table')) 4120 BLOCK.tableparser.__init__(line) 4121 4122 tablerow = TableMaster().parse_row(line) 4123 BLOCK.tableparser.add_row(tablerow) # save config 4124 4125 # maintain line to unmask and inlines 4126 line = string.join(tablerow['cells'], SEPARATOR) 4127 4128 #---------------------[ Paragraph ]------------------------- 4129 4130 if not BLOCK.block() and \ 4131 not string.count(line, MASK.tocmask): # new para! 4132 ret.extend(BLOCK.blockin('para')) 4133 4134 4135 ############################################################ 4136 ############################################################ 4137 ############################################################ 4138 4139 4140 #---------------------[ Final Parses ]---------------------- 4141 4142 # the target-specific special char escapes for body lines 4143 line = doEscape(target,line) 4144 4145 line = add_inline_tags(line) 4146 line = MASK.undo(line) 4147 4148 #---------------------[ Hold or Return? ]------------------- 4149 4150 ### now we must choose here to put the parsed line 4151 # 4152 if not results_box: 4153 # list item extra lines 4154 if BLOCK.block()[-4:] == 'list': 4155 results_box = BLOCK.holdaddsub 4156 # other blocks 4157 elif BLOCK.block(): 4158 results_box = BLOCK.holdadd 4159 # no blocks 4160 else: 4161 line = doFinalEscape(target, line) 4162 results_box = ret.append 4163 4164 results_box(line) 4165 4166 # EOF: close any open para/verb/lists/table/quotes 4167 Debug('EOF',7) 4168 while BLOCK.block(): 4169 ret.extend(BLOCK.blockout()) 4170 4171 # maybe close some opened title area? 4172 if rules['titleblocks']: 4173 ret.extend(TITLE.close_all()) 4174 4175 # maybe a major tag to enclose body? (like DIV for CSS) 4176 if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen']) 4177 if TAGS['bodyClose']: ret.append(TAGS['bodyClose']) 4178 4179 if CONF['toc-only']: ret = [] 4180 marked_toc = TITLE.dump_marked_toc(CONF['toc-level']) 4181 4182 # if dump-source, all parsing is ignored 4183 if CONF['dump-source']: ret = dump_source[:] 4184 4185 return ret, marked_toc 4186 4187 4188 4189############################################################################## 4190################################### GUI ###################################### 4191############################################################################## 4192# 4193# tk help: http://python.org/topics/tkinter/ 4194# tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html 4195# /usr/lib/python*/lib-tk/Tkinter.py 4196# 4197# grid table : row=0, column=0, columnspan=2, rowspan=2 4198# grid align : sticky='n,s,e,w' (North, South, East, West) 4199# pack place : side='top,bottom,right,left' 4200# pack fill : fill='x,y,both,none', expand=1 4201# pack align : anchor='n,s,e,w' (North, South, East, West) 4202# padding : padx=10, pady=10, ipadx=10, ipady=10 (internal) 4203# checkbox : offvalue is return if the _user_ deselected the box 4204# label align: justify=left,right,center 4205 4206def load_GUI_resources(): 4207 "Load all extra modules and methods used by GUI" 4208 global askopenfilename, showinfo, showwarning, showerror, Tkinter 4209 from tkFileDialog import askopenfilename 4210 from tkMessageBox import showinfo,showwarning,showerror 4211 import Tkinter 4212 4213class Gui: 4214 "Graphical Tk Interface" 4215 def __init__(self, conf={}): 4216 self.root = Tkinter.Tk() # mother window, come to butthead 4217 self.root.title(my_name) # window title bar text 4218 self.window = self.root # variable "focus" for inclusion 4219 self.row = 0 # row count for grid() 4220 4221 self.action_lenght = 150 # left column lenght (pixel) 4222 self.frame_margin = 10 # frame margin size (pixel) 4223 self.frame_border = 6 # frame border size (pixel) 4224 4225 # the default Gui colors, can be changed by %!guicolors 4226 self.dft_gui_colors = ['blue','white','lightblue','black'] 4227 self.gui_colors = [] 4228 self.bg1 = self.fg1 = self.bg2 = self.fg2 = '' 4229 4230 # on Tk, vars need to be set/get using setvar()/get() 4231 self.infile = self.setvar('') 4232 self.target = self.setvar('') 4233 self.target_name = self.setvar('') 4234 4235 # the checks appearance order 4236 self.checks = [ 4237 'headers','enum-title','toc','mask-email', 4238 'toc-only','stdout'] 4239 4240 # creating variables for all checks 4241 for check in self.checks: 4242 setattr(self, 'f_'+check, self.setvar('')) 4243 4244 # load RC config 4245 self.conf = {} 4246 if conf: self.load_config(conf) 4247 4248 def load_config(self, conf): 4249 self.conf = conf 4250 self.gui_colors = conf.get('guicolors') or self.dft_gui_colors 4251 self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors 4252 self.root.config(bd=15,bg=self.bg1) 4253 4254 ### config as dic for python 1.5 compat (**opts don't work :( ) 4255 def entry(self, **opts): return Tkinter.Entry(self.window, opts) 4256 def label(self, txt='', bg=None, **opts): 4257 opts.update({'text':txt,'bg':bg or self.bg1}) 4258 return Tkinter.Label(self.window, opts) 4259 def button(self,name,cmd,**opts): 4260 opts.update({'text':name,'command':cmd}) 4261 return Tkinter.Button(self.window, opts) 4262 def check(self,name,checked=0,**opts): 4263 bg, fg = self.bg2, self.fg2 4264 opts.update({ 4265 'text':name, 'onvalue':1, 'offvalue':0, 4266 'activeforeground':fg, 'fg':fg, 4267 'activebackground':bg, 'bg':bg, 4268 'highlightbackground':bg, 'anchor':'w' 4269 }) 4270 chk = Tkinter.Checkbutton(self.window, opts) 4271 if checked: chk.select() 4272 chk.grid(columnspan=2, sticky='w', padx=0) 4273 def menu(self,sel,items): 4274 return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items)) 4275 4276 # handy auxiliar functions 4277 def action(self, txt): 4278 self.label(txt, fg=self.fg1, bg=self.bg1, 4279 wraplength=self.action_lenght).grid(column=0,row=self.row) 4280 def frame_open(self): 4281 self.window = Tkinter.Frame(self.root,bg=self.bg2, 4282 borderwidth=self.frame_border) 4283 def frame_close(self): 4284 self.window.grid(column=1, row=self.row, sticky='w', 4285 padx=self.frame_margin) 4286 self.window = self.root 4287 self.label('').grid() 4288 self.row = self.row + 2 # update row count 4289 def target_name2key(self): 4290 name = self.target_name.get() 4291 target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS) 4292 try : key = target[0] 4293 except: key = '' 4294 self.target = self.setvar(key) 4295 def target_key2name(self): 4296 key = self.target.get() 4297 name = TARGET_NAMES.get(key) or key 4298 self.target_name = self.setvar(name) 4299 4300 def exit(self): self.root.destroy() 4301 def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z 4302 4303 def askfile(self): 4304 ftypes= [(_('txt2tags files'),('*.t2t','*.txt')), 4305 (_('All files'),'*')] 4306 newfile = askopenfilename(filetypes=ftypes) 4307 if newfile: 4308 self.infile.set(newfile) 4309 newconf = process_source_file(newfile)[0] 4310 newconf = ConfigMaster().sanity(newconf, gui=1) 4311 # restate all checkboxes after file selection 4312 #TODO how to make a refresh without killing it? 4313 self.root.destroy() 4314 self.__init__(newconf) 4315 self.mainwindow() 4316 4317 def scrollwindow(self, txt='no text!', title=''): 4318 # create components 4319 win = Tkinter.Toplevel() ; win.title(title) 4320 frame = Tkinter.Frame(win) 4321 scroll = Tkinter.Scrollbar(frame) 4322 text = Tkinter.Text(frame,yscrollcommand=scroll.set) 4323 button = Tkinter.Button(win) 4324 # config 4325 text.insert(Tkinter.END, string.join(txt,'\n')) 4326 scroll.config(command=text.yview) 4327 button.config(text=_('Close'), command=win.destroy) 4328 button.focus_set() 4329 # packing 4330 text.pack(side='left', fill='both', expand=1) 4331 scroll.pack(side='right', fill='y') 4332 frame.pack(fill='both', expand=1) 4333 button.pack(ipadx=30) 4334 4335 def runprogram(self): 4336 global CMDLINE_RAW 4337 # prepare 4338 self.target_name2key() 4339 infile, target = self.infile.get(), self.target.get() 4340 # sanity 4341 if not target: 4342 showwarning(my_name,_("You must select a target type!")) 4343 return 4344 if not infile: 4345 showwarning(my_name, 4346 _("You must provide the source file location!")) 4347 return 4348 # compose cmdline 4349 guiflags = [] 4350 real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse() 4351 if real_cmdline_conf.has_key('infile'): 4352 del real_cmdline_conf['infile'] 4353 if real_cmdline_conf.has_key('target'): 4354 del real_cmdline_conf['target'] 4355 real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf) 4356 default_outfile = ConfigMaster().get_outfile_name( 4357 {'sourcefile':infile, 'outfile':'', 'target':target}) 4358 for opt in self.checks: 4359 val = int(getattr(self, 'f_%s'%opt).get() or "0") 4360 if opt == 'stdout': opt = 'outfile' 4361 on_config = self.conf.get(opt) or 0 4362 on_cmdline = real_cmdline_conf.get(opt) or 0 4363 if opt == 'outfile': 4364 if on_config == STDOUT: on_config = 1 4365 else: on_config = 0 4366 if on_cmdline == STDOUT: on_cmdline = 1 4367 else: on_cmdline = 0 4368 if val != on_config or ( 4369 val == on_config == on_cmdline and 4370 real_cmdline_conf.has_key(opt)): 4371 if val: 4372 # was not set, but user selected on GUI 4373 Debug("user turned ON: %s"%opt) 4374 if opt == 'outfile': opt = '-o-' 4375 else: opt = '--%s'%opt 4376 else: 4377 # was set, but user deselected on GUI 4378 Debug("user turned OFF: %s"%opt) 4379 if opt == 'outfile': 4380 opt = "-o%s"%default_outfile 4381 else: opt = '--no-%s'%opt 4382 guiflags.append(opt) 4383 cmdline = [my_name, '-t', target] +real_cmdline \ 4384 +guiflags +[infile] 4385 Debug('Gui/Tk cmdline: %s'%cmdline,5) 4386 # run! 4387 cmdline_raw_orig = CMDLINE_RAW 4388 try: 4389 # fake the GUI cmdline as the real one, and parse file 4390 CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:]) 4391 data = process_source_file(infile) 4392 # on GUI, convert_* returns the data, not finish_him() 4393 outlist, config = convert_this_files([data]) 4394 # on GUI and STDOUT, finish_him() returns the data 4395 result = finish_him(outlist, config) 4396 # show outlist in s a nice new window 4397 if result: 4398 outlist, config = result 4399 title = _('%s: %s converted to %s')%( 4400 my_name, os.path.basename(infile), 4401 string.upper(config['target'])) 4402 self.scrollwindow(outlist, title) 4403 # show the "file saved" message 4404 else: 4405 msg = "%s\n\n %s\n%s\n\n %s\n%s"%( 4406 _('Conversion done!'), 4407 _('FROM:'), infile, 4408 _('TO:'), config['outfile']) 4409 showinfo(my_name, msg) 4410 except error: # common error (windowed), not quit 4411 pass 4412 except: # fatal error (windowed and printed) 4413 errormsg = getUnknownErrorMessage() 4414 print errormsg 4415 showerror(_('%s FATAL ERROR!')%my_name,errormsg) 4416 self.exit() 4417 CMDLINE_RAW = cmdline_raw_orig 4418 4419 def mainwindow(self): 4420 self.infile.set(self.conf.get('sourcefile') or '') 4421 self.target.set(self.conf.get('target') or \ 4422 _('-- select one --')) 4423 outfile = self.conf.get('outfile') 4424 if outfile == STDOUT: # map -o- 4425 self.conf['stdout'] = 1 4426 if self.conf.get('headers') == None: 4427 self.conf['headers'] = 1 # map default 4428 4429 action1 = _("Enter the source file location:") 4430 action2 = _("Choose the target document type:") 4431 action3 = _("Some options you may check:") 4432 action4 = _("Some extra options:") 4433 checks_txt = { 4434 'headers' : _("Include headers on output"), 4435 'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"), 4436 'toc' : _("Do TOC also (Table of Contents)"), 4437 'mask-email': _("Hide e-mails from SPAM robots"), 4438 4439 'toc-only' : _("Just do TOC, nothing more"), 4440 'stdout' : _("Dump to screen (Don't save target file)") 4441 } 4442 targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS) 4443 4444 # header 4445 self.label("%s %s"%(string.upper(my_name), my_version), 4446 bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10) 4447 self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url, 4448 bg=self.bg1, fg=self.fg1).grid(columnspan=2) 4449 self.row = 2 4450 # choose input file 4451 self.action(action1) ; self.frame_open() 4452 e_infile = self.entry(textvariable=self.infile,width=25) 4453 e_infile.grid(row=self.row, column=0, sticky='e') 4454 if not self.infile.get(): e_infile.focus_set() 4455 self.button(_("Browse"), self.askfile).grid( 4456 row=self.row, column=1, sticky='w', padx=10) 4457 # show outfile name, style and encoding (if any) 4458 txt = '' 4459 if outfile: 4460 txt = outfile 4461 if outfile == STDOUT: txt = _('<screen>') 4462 l_output = self.label(_('Output: ')+txt, 4463 fg=self.fg2,bg=self.bg2) 4464 l_output.grid(columnspan=2, sticky='w') 4465 for setting in ['style','encoding']: 4466 if self.conf.get(setting): 4467 name = string.capitalize(setting) 4468 val = self.conf[setting] 4469 self.label('%s: %s'%(name, val), 4470 fg=self.fg2, bg=self.bg2).grid( 4471 columnspan=2, sticky='w') 4472 # choose target 4473 self.frame_close() ; self.action(action2) 4474 self.frame_open() 4475 self.target_key2name() 4476 self.menu(self.target_name, targets_menu).grid( 4477 columnspan=2, sticky='w') 4478 # options checkboxes label 4479 self.frame_close() ; self.action(action3) 4480 self.frame_open() 4481 # compose options check boxes, example: 4482 # self.check(checks_txt['toc'],1,variable=self.f_toc) 4483 for check in self.checks: 4484 # extra options label 4485 if check == 'toc-only': 4486 self.frame_close() ; self.action(action4) 4487 self.frame_open() 4488 txt = checks_txt[check] 4489 var = getattr(self, 'f_'+check) 4490 checked = self.conf.get(check) 4491 self.check(txt,checked,variable=var) 4492 self.frame_close() 4493 # spacer and buttons 4494 self.label('').grid() ; self.row = self.row + 1 4495 b_quit = self.button(_("Quit"), self.exit) 4496 b_quit.grid(row=self.row, column=0, sticky='w', padx=30) 4497 b_conv = self.button(_("Convert!"), self.runprogram) 4498 b_conv.grid(row=self.row, column=1, sticky='e', padx=30) 4499 if self.target.get() and self.infile.get(): 4500 b_conv.focus_set() 4501 4502 # as documentation told me 4503 if sys.platform[:3] == 'win': 4504 self.root.iconify() 4505 self.root.update() 4506 self.root.deiconify() 4507 4508 self.root.mainloop() 4509 4510 4511############################################################################## 4512############################################################################## 4513 4514def exec_command_line(user_cmdline=[]): 4515 global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, Error 4516 4517 # extract command line data 4518 cmdline_data = user_cmdline or sys.argv[1:] 4519 CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1) 4520 cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse() 4521 DEBUG = cmdline_parsed.get('debug' ) or 0 4522 VERBOSE = cmdline_parsed.get('verbose') or 0 4523 QUIET = cmdline_parsed.get('quiet' ) or 0 4524 GUI = cmdline_parsed.get('gui' ) or 0 4525 infiles = cmdline_parsed.get('infile' ) or [] 4526 4527 Message(_("Txt2tags %s processing begins")%my_version,1) 4528 4529 # the easy ones 4530 if cmdline_parsed.get('help' ): Quit(USAGE) 4531 if cmdline_parsed.get('version'): Quit(VERSIONSTR) 4532 4533 # multifile haters 4534 if len(infiles) > 1: 4535 errmsg=_("Option --%s can't be used with multiple input files") 4536 for option in NO_MULTI_INPUT: 4537 if cmdline_parsed.get(option): 4538 Error(errmsg%option) 4539 4540 Debug("system platform: %s"%sys.platform) 4541 Debug("python version: %s"%(string.split(sys.version,'(')[0])) 4542 Debug("line break char: %s"%repr(LB)) 4543 Debug("command line: %s"%sys.argv) 4544 Debug("command line raw config: %s"%CMDLINE_RAW,1) 4545 4546 # extract RC file config 4547 if cmdline_parsed.get('rc') == 0: 4548 Message(_("Ignoring user configuration file"),1) 4549 else: 4550 rc_file = get_rc_path() 4551 if os.path.isfile(rc_file): 4552 Message(_("Loading user configuration file"),1) 4553 RC_RAW = ConfigLines(file=rc_file).get_raw_config() 4554 4555 Debug("rc file: %s"%rc_file) 4556 Debug("rc file raw config: %s"%RC_RAW,1) 4557 4558 # get all infiles config (if any) 4559 infiles_config = get_infiles_config(infiles) 4560 4561 # is GUI available? 4562 # try to load and start GUI interface for --gui 4563 # if program was called with no arguments, try GUI also 4564 if GUI or not infiles: 4565 try: 4566 load_GUI_resources() 4567 Debug("GUI resources OK (Tk module is installed)") 4568 winbox = Gui() 4569 Debug("GUI display OK") 4570 GUI = 1 4571 except: 4572 Debug("GUI Error: no Tk module or no DISPLAY") 4573 GUI = 0 4574 4575 # user forced --gui, but it's not available 4576 if cmdline_parsed.get('gui') and not GUI: 4577 print getTraceback(); print 4578 Error("Sorry, I can't run my Graphical Interface - GUI\n" 4579 "- Check if Python Tcl/Tk module is installed (Tkinter)\n" 4580 "- Make sure you are in a graphical environment (like X)") 4581 4582 # Okay, we will use GUI 4583 if GUI: 4584 Message(_("We are on GUI interface"),1) 4585 4586 # redefine Error function to raise exception instead sys.exit() 4587 def Error(msg): 4588 showerror(_('txt2tags ERROR!'), msg) 4589 raise error 4590 4591 # if no input file, get RC+cmdline config, else full config 4592 if not infiles: 4593 gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse() 4594 else: 4595 try : gui_conf = infiles_config[0][0] 4596 except: gui_conf = {} 4597 4598 # sanity is needed to set outfile and other things 4599 gui_conf = ConfigMaster().sanity(gui_conf, gui=1) 4600 Debug("GUI config: %s"%gui_conf,5) 4601 4602 # insert config and populate the nice window! 4603 winbox.load_config(gui_conf) 4604 winbox.mainwindow() 4605 4606 # console mode rocks forever! 4607 else: 4608 Message(_("We are on Command Line interface"),1) 4609 4610 # called with no arguments, show error 4611 if not infiles: Error(_('Missing input file (try --help)')) 4612 4613 convert_this_files(infiles_config) 4614 4615 Message(_("Txt2tags finished sucessfuly"),1) 4616 4617if __name__ == '__main__': 4618 try: 4619 exec_command_line() 4620 except error, msg: 4621 sys.stderr.write("%s\n"%msg) 4622 sys.stderr.flush() 4623 sys.exit(1) 4624 except SystemExit: 4625 pass 4626 except: 4627 print getUnknownErrorMessage() 4628 Quit() 4629 4630 4631# vim: ts=8 4632