1# -*- coding: iso-8859-1 -*-
2"""
3    MoinMoin - Base Source Parser
4
5    @copyright: 2002 by Taesu Pyo <bigflood@hitel.net>,
6                2005 by Oliver Graf <ograf@bitart.de>,
7                2005-2008 MoinMoin:ThomasWaldmann
8
9    @license: GNU GPL, see COPYING for details.
10
11
12basic css:
13
14pre.codearea     { font-style: sans-serif; color: #000000; }
15
16pre.codearea span.ID       { color: #000000; }
17pre.codearea span.Char     { color: #004080; }
18pre.codearea span.Comment  { color: #808080; }
19pre.codearea span.Number   { color: #008080; font-weight: bold; }
20pre.codearea span.String   { color: #004080; }
21pre.codearea span.SPChar   { color: #0000C0; }
22pre.codearea span.ResWord  { color: #4040ff; font-weight: bold; }
23pre.codearea span.ConsWord { color: #008080; font-weight: bold; }
24
25"""
26
27import hashlib
28import re
29
30from MoinMoin import log
31logging = log.getLogger(__name__)
32
33from MoinMoin import config, wikiutil
34from MoinMoin.parser import parse_start_step
35
36
37class FormatTextBase:
38    pass
39
40class FormatBeginLine(FormatTextBase):
41    def formatString(self, formatter, word):
42        return formatter.code_line(1)
43
44class FormatEndLine(FormatTextBase):
45    def formatString(self, formatter, word):
46        return formatter.code_line(0)
47
48class FormatText(FormatTextBase):
49
50    def __init__(self, fmt):
51        self.fmt = fmt
52
53    def formatString(self, formatter, word):
54        return (formatter.code_token(1, self.fmt) +
55                formatter.text(word) +
56                formatter.code_token(0, self.fmt))
57
58class FormatTextID(FormatTextBase):
59
60    def __init__(self, fmt, icase=False):
61        if not isinstance(fmt, FormatText):
62            fmt = FormatText(fmt)
63        self.setDefaultFormat(fmt)
64        self._ignore_case = icase
65        self.fmt = {}
66
67    def setDefaultFormat(self, fmt):
68        self._def_fmt = fmt
69
70    def addFormat(self, word, fmt):
71        if self._ignore_case:
72            word = word.lower()
73        self.fmt[word] = fmt
74
75    def formatString(self, formatter, word):
76        if self._ignore_case:
77            sword = word.lower()
78        else:
79            sword = word
80        return self.fmt.get(sword, self._def_fmt).formatString(formatter, word)
81
82
83class FormattingRuleSingle:
84
85    def __init__(self, name, str_re, icase=False):
86        self.name = name
87        self.str_re = str_re
88
89    def getStartRe(self):
90        return self.str_re
91
92    def getText(self, parser, hit):
93        return hit
94
95
96class FormattingRulePair:
97
98    def __init__(self, name, str_begin, str_end, icase=False):
99        self.name = name
100        self.str_begin = str_begin
101        self.str_end = str_end
102        re_flags = re.M
103        if icase:
104            re_flags |= re.I
105        self.end_re = re.compile(str_end, re_flags)
106
107    def getStartRe(self):
108        return self.str_begin
109
110    def getText(self, parser, hit):
111        match = self.end_re.search(parser.text, parser.lastpos)
112        if not match:
113            next_lastpos = parser.text_len
114        else:
115            next_lastpos = match.end() + (match.end() == parser.lastpos)
116        r = parser.text[parser.lastpos:next_lastpos]
117        parser.lastpos = next_lastpos
118        return hit + r
119
120
121class ParserBase:
122    """ DEPRECATED highlighting parser - please use/extend pygments library """
123    logging.warning('Using ParserBase is deprecated - please use/extend pygments syntax highlighting library.')
124
125    parsername = 'ParserBase'
126    tabwidth = 4
127
128    # for dirty tricks, see comment in format():
129    STARTL, STARTL_RE = u"^\n", ur"\^\n"
130    ENDL, ENDL_RE = u"\n$", ur"\n\$"
131    LINESEP = ENDL + STARTL
132
133    def __init__(self, raw, request, **kw):
134        self.raw = raw
135        self.request = request
136        self.show_nums, self.num_start, self.num_step, attrs = parse_start_step(request, kw.get('format_args', ''))
137
138        self._ignore_case = False
139        self._formatting_rules = []
140        self._formatting_rules_n2r = {}
141        self._formatting_rule_index = 0
142        self.rule_fmt = {}
143        #self.line_count = len(raw.split('\n')) + 1
144
145    def setupRules(self):
146        self.addRuleFormat("BEGINLINE", FormatBeginLine())
147        self.addRuleFormat("ENDLINE", FormatEndLine())
148        # we need a little dirty trick here, see comment in format():
149        self.addRule("BEGINLINE", self.STARTL_RE)
150        self.addRule("ENDLINE", self.ENDL_RE)
151
152        self.def_format = FormatText('Default')
153        self.reserved_word_format = FormatText('ResWord')
154        self.constant_word_format = FormatText('ConsWord')
155        self.ID_format = FormatTextID('ID', self._ignore_case)
156        self.addRuleFormat("ID", self.ID_format)
157        self.addRuleFormat("Operator")
158        self.addRuleFormat("Char")
159        self.addRuleFormat("Comment")
160        self.addRuleFormat("Number")
161        self.addRuleFormat("String")
162        self.addRuleFormat("SPChar")
163        self.addRuleFormat("ResWord")
164        self.addRuleFormat("ResWord2")
165        self.addRuleFormat("ConsWord")
166        self.addRuleFormat("Special")
167        self.addRuleFormat("Preprc")
168        self.addRuleFormat("Error")
169
170    def _addRule(self, name, fmt):
171        self._formatting_rule_index += 1
172        name = "%s_%s" % (name, self._formatting_rule_index) # create unique name
173        self._formatting_rules.append((name, fmt))
174        self._formatting_rules_n2r[name] = fmt
175
176    def addRule(self, name, str_re):
177        self._addRule(name, FormattingRuleSingle(name, str_re, self._ignore_case))
178
179    def addRulePair(self, name, start_re, end_re):
180        self._addRule(name, FormattingRulePair(name, start_re, end_re, self._ignore_case))
181
182    def addWords(self, words, fmt):
183        if not isinstance(fmt, FormatTextBase):
184            fmt = FormatText(fmt)
185        for w in words:
186            self.ID_format.addFormat(w, fmt)
187
188    def addReserved(self, words):
189        self.addWords(words, self.reserved_word_format)
190
191    def addConstant(self, words):
192        self.addWords(words, self.constant_word_format)
193
194    def addRuleFormat(self, name, fmt=None):
195        if fmt is None:
196            fmt = FormatText(name)
197        self.rule_fmt[name] = fmt
198
199    def format(self, formatter, form=None, **kw):
200        """ Send the text.
201        """
202
203        self.setupRules()
204
205        formatting_regexes = ["(?P<%s>%s)" % (n, f.getStartRe())
206                              for n, f in self._formatting_rules]
207        re_flags = re.M
208        if self._ignore_case:
209            re_flags |= re.I
210        scan_re = re.compile("|".join(formatting_regexes), re_flags)
211
212        self.text = self.raw
213
214        # dirty little trick to work around re lib's limitations (it can't have
215        # zero length matches at line beginning for ^ and at the same time match
216        # something else at the beginning of the line):
217        self.text = self.LINESEP.join([line.replace('\r', '') for line in self.text.splitlines()])
218        self.text = self.STARTL + self.text + self.ENDL
219        self.text_len = len(self.text)
220
221        result = [] # collects output
222
223        self._code_id = hashlib.new('sha1', self.raw.encode(config.charset)).hexdigest()
224        result.append(formatter.code_area(1, self._code_id, self.parsername, self.show_nums, self.num_start, self.num_step))
225
226        self.lastpos = 0
227        match = scan_re.search(self.text)
228        while match and self.lastpos < self.text_len:
229            # add the rendering of the text left of the match we found
230            text = self.text[self.lastpos:match.start()]
231            if text:
232                result.extend(self.format_normal_text(formatter, text))
233            self.lastpos = match.end() + (match.end() == self.lastpos)
234
235            # add the rendering of the match we found
236            result.extend(self.format_match(formatter, match))
237
238            # search for the next one
239            match = scan_re.search(self.text, self.lastpos)
240
241        # add the rendering of the text right of the last match we found
242        text = self.text[self.lastpos:]
243        if text:
244            result.extend(self.format_normal_text(formatter, text))
245
246        result.append(formatter.code_area(0, self._code_id))
247        self.request.write(''.join(result))
248
249    def format_normal_text(self, formatter, text):
250        return [formatter.text(text.expandtabs(self.tabwidth))]
251
252    def format_match(self, formatter, match):
253        result = []
254        for n, hit in match.groupdict().items():
255            if hit is None:
256                continue
257            r = self._formatting_rules_n2r[n]
258            s = r.getText(self, hit)
259            c = self.rule_fmt.get(r.name, None)
260            if not c:
261                c = self.def_format
262            if s:
263                lines = s.expandtabs(self.tabwidth).split(self.LINESEP)
264                for line in lines[:-1]:
265                    result.append(c.formatString(formatter, line))
266                    result.append(FormatEndLine().formatString(formatter, ''))
267                    result.append(FormatBeginLine().formatString(formatter, ''))
268                result.append(c.formatString(formatter, lines[-1]))
269        return result
270
271