1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.webmisc
4    ~~~~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for misc. web stuff.
7
8    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15    default, using
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17    Number, Punctuation, Literal
18from pygments.util import unirange
19
20from pygments.lexers.css import _indentation, _starts_block
21from pygments.lexers.html import HtmlLexer
22from pygments.lexers.javascript import JavascriptLexer
23from pygments.lexers.ruby import RubyLexer
24
25__all__ = ['DuelLexer', 'SlimLexer', 'XQueryLexer', 'QmlLexer', 'CirruLexer']
26
27
28class DuelLexer(RegexLexer):
29    """
30    Lexer for Duel Views Engine (formerly JBST) markup with JavaScript code blocks.
31    See http://duelengine.org/.
32    See http://jsonml.org/jbst/.
33
34    .. versionadded:: 1.4
35    """
36
37    name = 'Duel'
38    aliases = ['duel', 'jbst', 'jsonml+bst']
39    filenames = ['*.duel', '*.jbst']
40    mimetypes = ['text/x-duel', 'text/x-jbst']
41
42    flags = re.DOTALL
43
44    tokens = {
45        'root': [
46            (r'(<%[@=#!:]?)(.*?)(%>)',
47             bygroups(Name.Tag, using(JavascriptLexer), Name.Tag)),
48            (r'(<%\$)(.*?)(:)(.*?)(%>)',
49             bygroups(Name.Tag, Name.Function, Punctuation, String, Name.Tag)),
50            (r'(<%--)(.*?)(--%>)',
51             bygroups(Name.Tag, Comment.Multiline, Name.Tag)),
52            (r'(<script.*?>)(.*?)(</script>)',
53             bygroups(using(HtmlLexer),
54                      using(JavascriptLexer), using(HtmlLexer))),
55            (r'(.+?)(?=<)', using(HtmlLexer)),
56            (r'.+', using(HtmlLexer)),
57        ],
58    }
59
60
61class XQueryLexer(ExtendedRegexLexer):
62    """
63    An XQuery lexer, parsing a stream and outputting the tokens needed to
64    highlight xquery code.
65
66    .. versionadded:: 1.4
67    """
68    name = 'XQuery'
69    aliases = ['xquery', 'xqy', 'xq', 'xql', 'xqm']
70    filenames = ['*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm']
71    mimetypes = ['text/xquery', 'application/xquery']
72
73    xquery_parse_state = []
74
75    # FIX UNICODE LATER
76    # ncnamestartchar = (
77    #    ur"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|"
78    #    ur"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|"
79    #    ur"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|"
80    #    ur"[\u10000-\uEFFFF]"
81    # )
82    ncnamestartchar = r"(?:[A-Z]|_|[a-z])"
83    # FIX UNICODE LATER
84    # ncnamechar = ncnamestartchar + (ur"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|"
85    #                                 ur"[\u203F-\u2040]")
86    ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])"
87    ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar)
88    pitarget_namestartchar = r"(?:[A-KN-WYZ]|_|:|[a-kn-wyz])"
89    pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])"
90    pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar)
91    prefixedname = "%s:%s" % (ncname, ncname)
92    unprefixedname = ncname
93    qname = "(?:%s|%s)" % (prefixedname, unprefixedname)
94
95    entityref = r'(?:&(?:lt|gt|amp|quot|apos|nbsp);)'
96    charref = r'(?:&#[0-9]+;|&#x[0-9a-fA-F]+;)'
97
98    stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")'
99    stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')"
100
101    # FIX UNICODE LATER
102    # elementcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|'
103    #                       ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]')
104    elementcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_\'`|~]'
105    # quotattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|'
106    #                        ur'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]')
107    quotattrcontentchar = r'[A-Za-z]|\s|\d|[!#$%()*+,\-./:;=?@\[\\\]^_\'`|~]'
108    # aposattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|'
109    #                        ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]')
110    aposattrcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_`|~]'
111
112    # CHAR elements - fix the above elementcontentchar, quotattrcontentchar,
113    #                 aposattrcontentchar
114    # x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
115
116    flags = re.DOTALL | re.MULTILINE | re.UNICODE
117
118    def punctuation_root_callback(lexer, match, ctx):
119        yield match.start(), Punctuation, match.group(1)
120        # transition to root always - don't pop off stack
121        ctx.stack = ['root']
122        ctx.pos = match.end()
123
124    def operator_root_callback(lexer, match, ctx):
125        yield match.start(), Operator, match.group(1)
126        # transition to root always - don't pop off stack
127        ctx.stack = ['root']
128        ctx.pos = match.end()
129
130    def popstate_tag_callback(lexer, match, ctx):
131        yield match.start(), Name.Tag, match.group(1)
132        ctx.stack.append(lexer.xquery_parse_state.pop())
133        ctx.pos = match.end()
134
135    def popstate_xmlcomment_callback(lexer, match, ctx):
136        yield match.start(), String.Doc, match.group(1)
137        ctx.stack.append(lexer.xquery_parse_state.pop())
138        ctx.pos = match.end()
139
140    def popstate_kindtest_callback(lexer, match, ctx):
141        yield match.start(), Punctuation, match.group(1)
142        next_state = lexer.xquery_parse_state.pop()
143        if next_state == 'occurrenceindicator':
144            if re.match("[?*+]+", match.group(2)):
145                yield match.start(), Punctuation, match.group(2)
146                ctx.stack.append('operator')
147                ctx.pos = match.end()
148            else:
149                ctx.stack.append('operator')
150                ctx.pos = match.end(1)
151        else:
152            ctx.stack.append(next_state)
153            ctx.pos = match.end(1)
154
155    def popstate_callback(lexer, match, ctx):
156        yield match.start(), Punctuation, match.group(1)
157        # if we have run out of our state stack, pop whatever is on the pygments
158        # state stack
159        if len(lexer.xquery_parse_state) == 0:
160            ctx.stack.pop()
161        elif len(ctx.stack) > 1:
162            ctx.stack.append(lexer.xquery_parse_state.pop())
163        else:
164            # i don't know if i'll need this, but in case, default back to root
165            ctx.stack = ['root']
166        ctx.pos = match.end()
167
168    def pushstate_element_content_starttag_callback(lexer, match, ctx):
169        yield match.start(), Name.Tag, match.group(1)
170        lexer.xquery_parse_state.append('element_content')
171        ctx.stack.append('start_tag')
172        ctx.pos = match.end()
173
174    def pushstate_cdata_section_callback(lexer, match, ctx):
175        yield match.start(), String.Doc, match.group(1)
176        ctx.stack.append('cdata_section')
177        lexer.xquery_parse_state.append(ctx.state.pop)
178        ctx.pos = match.end()
179
180    def pushstate_starttag_callback(lexer, match, ctx):
181        yield match.start(), Name.Tag, match.group(1)
182        lexer.xquery_parse_state.append(ctx.state.pop)
183        ctx.stack.append('start_tag')
184        ctx.pos = match.end()
185
186    def pushstate_operator_order_callback(lexer, match, ctx):
187        yield match.start(), Keyword, match.group(1)
188        yield match.start(), Text, match.group(2)
189        yield match.start(), Punctuation, match.group(3)
190        ctx.stack = ['root']
191        lexer.xquery_parse_state.append('operator')
192        ctx.pos = match.end()
193
194    def pushstate_operator_map_callback(lexer, match, ctx):
195        yield match.start(), Keyword, match.group(1)
196        yield match.start(), Text, match.group(2)
197        yield match.start(), Punctuation, match.group(3)
198        ctx.stack = ['root']
199        lexer.xquery_parse_state.append('operator')
200        ctx.pos = match.end()
201
202    def pushstate_operator_root_validate(lexer, match, ctx):
203        yield match.start(), Keyword, match.group(1)
204        yield match.start(), Text, match.group(2)
205        yield match.start(), Punctuation, match.group(3)
206        ctx.stack = ['root']
207        lexer.xquery_parse_state.append('operator')
208        ctx.pos = match.end()
209
210    def pushstate_operator_root_validate_withmode(lexer, match, ctx):
211        yield match.start(), Keyword, match.group(1)
212        yield match.start(), Text, match.group(2)
213        yield match.start(), Keyword, match.group(3)
214        ctx.stack = ['root']
215        lexer.xquery_parse_state.append('operator')
216        ctx.pos = match.end()
217
218    def pushstate_operator_processing_instruction_callback(lexer, match, ctx):
219        yield match.start(), String.Doc, match.group(1)
220        ctx.stack.append('processing_instruction')
221        lexer.xquery_parse_state.append('operator')
222        ctx.pos = match.end()
223
224    def pushstate_element_content_processing_instruction_callback(lexer, match, ctx):
225        yield match.start(), String.Doc, match.group(1)
226        ctx.stack.append('processing_instruction')
227        lexer.xquery_parse_state.append('element_content')
228        ctx.pos = match.end()
229
230    def pushstate_element_content_cdata_section_callback(lexer, match, ctx):
231        yield match.start(), String.Doc, match.group(1)
232        ctx.stack.append('cdata_section')
233        lexer.xquery_parse_state.append('element_content')
234        ctx.pos = match.end()
235
236    def pushstate_operator_cdata_section_callback(lexer, match, ctx):
237        yield match.start(), String.Doc, match.group(1)
238        ctx.stack.append('cdata_section')
239        lexer.xquery_parse_state.append('operator')
240        ctx.pos = match.end()
241
242    def pushstate_element_content_xmlcomment_callback(lexer, match, ctx):
243        yield match.start(), String.Doc, match.group(1)
244        ctx.stack.append('xml_comment')
245        lexer.xquery_parse_state.append('element_content')
246        ctx.pos = match.end()
247
248    def pushstate_operator_xmlcomment_callback(lexer, match, ctx):
249        yield match.start(), String.Doc, match.group(1)
250        ctx.stack.append('xml_comment')
251        lexer.xquery_parse_state.append('operator')
252        ctx.pos = match.end()
253
254    def pushstate_kindtest_callback(lexer, match, ctx):
255        yield match.start(), Keyword, match.group(1)
256        yield match.start(), Text, match.group(2)
257        yield match.start(), Punctuation, match.group(3)
258        lexer.xquery_parse_state.append('kindtest')
259        ctx.stack.append('kindtest')
260        ctx.pos = match.end()
261
262    def pushstate_operator_kindtestforpi_callback(lexer, match, ctx):
263        yield match.start(), Keyword, match.group(1)
264        yield match.start(), Text, match.group(2)
265        yield match.start(), Punctuation, match.group(3)
266        lexer.xquery_parse_state.append('operator')
267        ctx.stack.append('kindtestforpi')
268        ctx.pos = match.end()
269
270    def pushstate_operator_kindtest_callback(lexer, match, ctx):
271        yield match.start(), Keyword, match.group(1)
272        yield match.start(), Text, match.group(2)
273        yield match.start(), Punctuation, match.group(3)
274        lexer.xquery_parse_state.append('operator')
275        ctx.stack.append('kindtest')
276        ctx.pos = match.end()
277
278    def pushstate_occurrenceindicator_kindtest_callback(lexer, match, ctx):
279        yield match.start(), Name.Tag, match.group(1)
280        yield match.start(), Text, match.group(2)
281        yield match.start(), Punctuation, match.group(3)
282        lexer.xquery_parse_state.append('occurrenceindicator')
283        ctx.stack.append('kindtest')
284        ctx.pos = match.end()
285
286    def pushstate_operator_starttag_callback(lexer, match, ctx):
287        yield match.start(), Name.Tag, match.group(1)
288        lexer.xquery_parse_state.append('operator')
289        ctx.stack.append('start_tag')
290        ctx.pos = match.end()
291
292    def pushstate_operator_root_callback(lexer, match, ctx):
293        yield match.start(), Punctuation, match.group(1)
294        lexer.xquery_parse_state.append('operator')
295        ctx.stack = ['root']
296        ctx.pos = match.end()
297
298    def pushstate_operator_root_construct_callback(lexer, match, ctx):
299        yield match.start(), Keyword, match.group(1)
300        yield match.start(), Text, match.group(2)
301        yield match.start(), Punctuation, match.group(3)
302        lexer.xquery_parse_state.append('operator')
303        ctx.stack = ['root']
304        ctx.pos = match.end()
305
306    def pushstate_root_callback(lexer, match, ctx):
307        yield match.start(), Punctuation, match.group(1)
308        cur_state = ctx.stack.pop()
309        lexer.xquery_parse_state.append(cur_state)
310        ctx.stack = ['root']
311        ctx.pos = match.end()
312
313    def pushstate_operator_attribute_callback(lexer, match, ctx):
314        yield match.start(), Name.Attribute, match.group(1)
315        ctx.stack.append('operator')
316        ctx.pos = match.end()
317
318    def pushstate_operator_callback(lexer, match, ctx):
319        yield match.start(), Keyword, match.group(1)
320        yield match.start(), Text, match.group(2)
321        yield match.start(), Punctuation, match.group(3)
322        lexer.xquery_parse_state.append('operator')
323        ctx.pos = match.end()
324
325    tokens = {
326        'comment': [
327            # xquery comments
328            (r'(:\))', Comment, '#pop'),
329            (r'(\(:)', Comment, '#push'),
330            (r'[^:)]', Comment),
331            (r'([^:)]|:|\))', Comment),
332        ],
333        'whitespace': [
334            (r'\s+', Text),
335        ],
336        'operator': [
337            include('whitespace'),
338            (r'(\})', popstate_callback),
339            (r'\(:', Comment, 'comment'),
340
341            (r'(\{)', pushstate_root_callback),
342            (r'then|else|external|at|div|except', Keyword, 'root'),
343            (r'order by', Keyword, 'root'),
344            (r'group by', Keyword, 'root'),
345            (r'is|mod|order\s+by|stable\s+order\s+by', Keyword, 'root'),
346            (r'and|or', Operator.Word, 'root'),
347            (r'(eq|ge|gt|le|lt|ne|idiv|intersect|in)(?=\b)',
348             Operator.Word, 'root'),
349            (r'return|satisfies|to|union|where|count|preserve\s+strip',
350             Keyword, 'root'),
351            (r'(>=|>>|>|<=|<<|<|-|\*|!=|\+|\|\||\||:=|=|!)',
352             operator_root_callback),
353            (r'(::|:|;|\[|//|/|,)',
354             punctuation_root_callback),
355            (r'(castable|cast)(\s+)(as)\b',
356             bygroups(Keyword, Text, Keyword), 'singletype'),
357            (r'(instance)(\s+)(of)\b',
358             bygroups(Keyword, Text, Keyword), 'itemtype'),
359            (r'(treat)(\s+)(as)\b',
360             bygroups(Keyword, Text, Keyword), 'itemtype'),
361            (r'(case)(\s+)(' + stringdouble + ')',
362             bygroups(Keyword, Text, String.Double), 'itemtype'),
363            (r'(case)(\s+)(' + stringsingle + ')',
364             bygroups(Keyword, Text, String.Single), 'itemtype'),
365            (r'(case|as)\b', Keyword, 'itemtype'),
366            (r'(\))(\s*)(as)',
367             bygroups(Punctuation, Text, Keyword), 'itemtype'),
368            (r'\$', Name.Variable, 'varname'),
369            (r'(for|let|previous|next)(\s+)(\$)',
370             bygroups(Keyword, Text, Name.Variable), 'varname'),
371            (r'(for)(\s+)(tumbling|sliding)(\s+)(window)(\s+)(\$)',
372             bygroups(Keyword, Text, Keyword, Text, Keyword, Text, Name.Variable),
373             'varname'),
374            # (r'\)|\?|\]', Punctuation, '#push'),
375            (r'\)|\?|\]', Punctuation),
376            (r'(empty)(\s+)(greatest|least)', bygroups(Keyword, Text, Keyword)),
377            (r'ascending|descending|default', Keyword, '#push'),
378            (r'(allowing)(\s+)(empty)', bygroups(Keyword, Text, Keyword)),
379            (r'external', Keyword),
380            (r'(start|when|end)', Keyword, 'root'),
381            (r'(only)(\s+)(end)', bygroups(Keyword, Text, Keyword), 'root'),
382            (r'collation', Keyword, 'uritooperator'),
383
384            # eXist specific XQUF
385            (r'(into|following|preceding|with)', Keyword, 'root'),
386
387            # support for current context on rhs of Simple Map Operator
388            (r'\.', Operator),
389
390            # finally catch all string literals and stay in operator state
391            (stringdouble, String.Double),
392            (stringsingle, String.Single),
393
394            (r'(catch)(\s*)', bygroups(Keyword, Text), 'root'),
395        ],
396        'uritooperator': [
397            (stringdouble, String.Double, '#pop'),
398            (stringsingle, String.Single, '#pop'),
399        ],
400        'namespacedecl': [
401            include('whitespace'),
402            (r'\(:', Comment, 'comment'),
403            (r'(at)(\s+)('+stringdouble+')', bygroups(Keyword, Text, String.Double)),
404            (r"(at)(\s+)("+stringsingle+')', bygroups(Keyword, Text, String.Single)),
405            (stringdouble, String.Double),
406            (stringsingle, String.Single),
407            (r',', Punctuation),
408            (r'=', Operator),
409            (r';', Punctuation, 'root'),
410            (ncname, Name.Namespace),
411        ],
412        'namespacekeyword': [
413            include('whitespace'),
414            (r'\(:', Comment, 'comment'),
415            (stringdouble, String.Double, 'namespacedecl'),
416            (stringsingle, String.Single, 'namespacedecl'),
417            (r'inherit|no-inherit', Keyword, 'root'),
418            (r'namespace', Keyword, 'namespacedecl'),
419            (r'(default)(\s+)(element)', bygroups(Keyword, Text, Keyword)),
420            (r'preserve|no-preserve', Keyword),
421            (r',', Punctuation),
422        ],
423        'annotationname': [
424            (r'\(:', Comment, 'comment'),
425            (qname, Name.Decorator),
426            (r'(\()(' + stringdouble + ')', bygroups(Punctuation, String.Double)),
427            (r'(\()(' + stringsingle + ')', bygroups(Punctuation, String.Single)),
428            (r'(\,)(\s+)(' + stringdouble + ')',
429             bygroups(Punctuation, Text, String.Double)),
430            (r'(\,)(\s+)(' + stringsingle + ')',
431             bygroups(Punctuation, Text, String.Single)),
432            (r'\)', Punctuation),
433            (r'(\s+)(\%)', bygroups(Text, Name.Decorator), 'annotationname'),
434            (r'(\s+)(variable)(\s+)(\$)',
435             bygroups(Text, Keyword.Declaration, Text, Name.Variable), 'varname'),
436            (r'(\s+)(function)(\s+)',
437             bygroups(Text, Keyword.Declaration, Text), 'root')
438        ],
439        'varname': [
440            (r'\(:', Comment, 'comment'),
441            (r'(' + qname + ')(\()?', bygroups(Name, Punctuation), 'operator'),
442        ],
443        'singletype': [
444            include('whitespace'),
445            (r'\(:', Comment, 'comment'),
446            (ncname + r'(:\*)', Name.Variable, 'operator'),
447            (qname, Name.Variable, 'operator'),
448        ],
449        'itemtype': [
450            include('whitespace'),
451            (r'\(:', Comment, 'comment'),
452            (r'\$', Name.Variable, 'varname'),
453            (r'(void)(\s*)(\()(\s*)(\))',
454             bygroups(Keyword, Text, Punctuation, Text, Punctuation), 'operator'),
455            (r'(element|attribute|schema-element|schema-attribute|comment|text|'
456             r'node|binary|document-node|empty-sequence)(\s*)(\()',
457             pushstate_occurrenceindicator_kindtest_callback),
458            # Marklogic specific type?
459            (r'(processing-instruction)(\s*)(\()',
460             bygroups(Keyword, Text, Punctuation),
461             ('occurrenceindicator', 'kindtestforpi')),
462            (r'(item)(\s*)(\()(\s*)(\))(?=[*+?])',
463             bygroups(Keyword, Text, Punctuation, Text, Punctuation),
464             'occurrenceindicator'),
465            (r'(\(\#)(\s*)', bygroups(Punctuation, Text), 'pragma'),
466            (r';', Punctuation, '#pop'),
467            (r'then|else', Keyword, '#pop'),
468            (r'(at)(\s+)(' + stringdouble + ')',
469             bygroups(Keyword, Text, String.Double), 'namespacedecl'),
470            (r'(at)(\s+)(' + stringsingle + ')',
471             bygroups(Keyword, Text, String.Single), 'namespacedecl'),
472            (r'except|intersect|in|is|return|satisfies|to|union|where|count',
473             Keyword, 'root'),
474            (r'and|div|eq|ge|gt|le|lt|ne|idiv|mod|or', Operator.Word, 'root'),
475            (r':=|=|,|>=|>>|>|\[|\(|<=|<<|<|-|!=|\|\||\|', Operator, 'root'),
476            (r'external|at', Keyword, 'root'),
477            (r'(stable)(\s+)(order)(\s+)(by)',
478             bygroups(Keyword, Text, Keyword, Text, Keyword), 'root'),
479            (r'(castable|cast)(\s+)(as)',
480             bygroups(Keyword, Text, Keyword), 'singletype'),
481            (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword)),
482            (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword)),
483            (r'(case)(\s+)(' + stringdouble + ')',
484             bygroups(Keyword, Text, String.Double), 'itemtype'),
485            (r'(case)(\s+)(' + stringsingle + ')',
486             bygroups(Keyword, Text, String.Single), 'itemtype'),
487            (r'case|as', Keyword, 'itemtype'),
488            (r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'),
489            (ncname + r':\*', Keyword.Type, 'operator'),
490            (r'(function|map|array)(\()', bygroups(Keyword.Type, Punctuation)),
491            (qname, Keyword.Type, 'occurrenceindicator'),
492        ],
493        'kindtest': [
494            (r'\(:', Comment, 'comment'),
495            (r'\{', Punctuation, 'root'),
496            (r'(\))([*+?]?)', popstate_kindtest_callback),
497            (r'\*', Name, 'closekindtest'),
498            (qname, Name, 'closekindtest'),
499            (r'(element|schema-element)(\s*)(\()', pushstate_kindtest_callback),
500        ],
501        'kindtestforpi': [
502            (r'\(:', Comment, 'comment'),
503            (r'\)', Punctuation, '#pop'),
504            (ncname, Name.Variable),
505            (stringdouble, String.Double),
506            (stringsingle, String.Single),
507        ],
508        'closekindtest': [
509            (r'\(:', Comment, 'comment'),
510            (r'(\))', popstate_callback),
511            (r',', Punctuation),
512            (r'(\{)', pushstate_operator_root_callback),
513            (r'\?', Punctuation),
514        ],
515        'xml_comment': [
516            (r'(-->)', popstate_xmlcomment_callback),
517            (r'[^-]{1,2}', Literal),
518            (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
519             unirange(0x10000, 0x10ffff), Literal),
520        ],
521        'processing_instruction': [
522            (r'\s+', Text, 'processing_instruction_content'),
523            (r'\?>', String.Doc, '#pop'),
524            (pitarget, Name),
525        ],
526        'processing_instruction_content': [
527            (r'\?>', String.Doc, '#pop'),
528            (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
529             unirange(0x10000, 0x10ffff), Literal),
530        ],
531        'cdata_section': [
532            (r']]>', String.Doc, '#pop'),
533            (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
534             unirange(0x10000, 0x10ffff), Literal),
535        ],
536        'start_tag': [
537            include('whitespace'),
538            (r'(/>)', popstate_tag_callback),
539            (r'>', Name.Tag, 'element_content'),
540            (r'"', Punctuation, 'quot_attribute_content'),
541            (r"'", Punctuation, 'apos_attribute_content'),
542            (r'=', Operator),
543            (qname, Name.Tag),
544        ],
545        'quot_attribute_content': [
546            (r'"', Punctuation, 'start_tag'),
547            (r'(\{)', pushstate_root_callback),
548            (r'""', Name.Attribute),
549            (quotattrcontentchar, Name.Attribute),
550            (entityref, Name.Attribute),
551            (charref, Name.Attribute),
552            (r'\{\{|\}\}', Name.Attribute),
553        ],
554        'apos_attribute_content': [
555            (r"'", Punctuation, 'start_tag'),
556            (r'\{', Punctuation, 'root'),
557            (r"''", Name.Attribute),
558            (aposattrcontentchar, Name.Attribute),
559            (entityref, Name.Attribute),
560            (charref, Name.Attribute),
561            (r'\{\{|\}\}', Name.Attribute),
562        ],
563        'element_content': [
564            (r'</', Name.Tag, 'end_tag'),
565            (r'(\{)', pushstate_root_callback),
566            (r'(<!--)', pushstate_element_content_xmlcomment_callback),
567            (r'(<\?)', pushstate_element_content_processing_instruction_callback),
568            (r'(<!\[CDATA\[)', pushstate_element_content_cdata_section_callback),
569            (r'(<)', pushstate_element_content_starttag_callback),
570            (elementcontentchar, Literal),
571            (entityref, Literal),
572            (charref, Literal),
573            (r'\{\{|\}\}', Literal),
574        ],
575        'end_tag': [
576            include('whitespace'),
577            (r'(>)', popstate_tag_callback),
578            (qname, Name.Tag),
579        ],
580        'xmlspace_decl': [
581            include('whitespace'),
582            (r'\(:', Comment, 'comment'),
583            (r'preserve|strip', Keyword, '#pop'),
584        ],
585        'declareordering': [
586            (r'\(:', Comment, 'comment'),
587            include('whitespace'),
588            (r'ordered|unordered', Keyword, '#pop'),
589        ],
590        'xqueryversion': [
591            include('whitespace'),
592            (r'\(:', Comment, 'comment'),
593            (stringdouble, String.Double),
594            (stringsingle, String.Single),
595            (r'encoding', Keyword),
596            (r';', Punctuation, '#pop'),
597        ],
598        'pragma': [
599            (qname, Name.Variable, 'pragmacontents'),
600        ],
601        'pragmacontents': [
602            (r'#\)', Punctuation, 'operator'),
603            (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
604             unirange(0x10000, 0x10ffff), Literal),
605            (r'(\s+)', Text),
606        ],
607        'occurrenceindicator': [
608            include('whitespace'),
609            (r'\(:', Comment, 'comment'),
610            (r'\*|\?|\+', Operator, 'operator'),
611            (r':=', Operator, 'root'),
612            default('operator'),
613        ],
614        'option': [
615            include('whitespace'),
616            (qname, Name.Variable, '#pop'),
617        ],
618        'qname_braren': [
619            include('whitespace'),
620            (r'(\{)', pushstate_operator_root_callback),
621            (r'(\()', Punctuation, 'root'),
622        ],
623        'element_qname': [
624            (qname, Name.Variable, 'root'),
625        ],
626        'attribute_qname': [
627            (qname, Name.Variable, 'root'),
628        ],
629        'root': [
630            include('whitespace'),
631            (r'\(:', Comment, 'comment'),
632
633            # handle operator state
634            # order on numbers matters - handle most complex first
635            (r'\d+(\.\d*)?[eE][+-]?\d+', Number.Float, 'operator'),
636            (r'(\.\d+)[eE][+-]?\d+', Number.Float, 'operator'),
637            (r'(\.\d+|\d+\.\d*)', Number.Float, 'operator'),
638            (r'(\d+)', Number.Integer, 'operator'),
639            (r'(\.\.|\.|\))', Punctuation, 'operator'),
640            (r'(declare)(\s+)(construction)',
641             bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'operator'),
642            (r'(declare)(\s+)(default)(\s+)(order)',
643             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration), 'operator'),
644            (r'(declare)(\s+)(context)(\s+)(item)',
645             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration), 'operator'),
646            (ncname + ':\*', Name, 'operator'),
647            ('\*:'+ncname, Name.Tag, 'operator'),
648            ('\*', Name.Tag, 'operator'),
649            (stringdouble, String.Double, 'operator'),
650            (stringsingle, String.Single, 'operator'),
651
652            (r'(\}|\])', popstate_callback),
653
654            # NAMESPACE DECL
655            (r'(declare)(\s+)(default)(\s+)(collation)',
656             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration)),
657            (r'(module|declare)(\s+)(namespace)',
658             bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'namespacedecl'),
659            (r'(declare)(\s+)(base-uri)',
660             bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'namespacedecl'),
661
662            # NAMESPACE KEYWORD
663            (r'(declare)(\s+)(default)(\s+)(element|function)',
664             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Keyword.Declaration), 'namespacekeyword'),
665            (r'(import)(\s+)(schema|module)',
666             bygroups(Keyword.Pseudo, Text, Keyword.Pseudo), 'namespacekeyword'),
667            (r'(declare)(\s+)(copy-namespaces)',
668             bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'namespacekeyword'),
669
670            # VARNAMEs
671            (r'(for|let|some|every)(\s+)(\$)',
672             bygroups(Keyword, Text, Name.Variable), 'varname'),
673            (r'(for)(\s+)(tumbling|sliding)(\s+)(window)(\s+)(\$)',
674             bygroups(Keyword, Text, Keyword, Text, Keyword, Text, Name.Variable), 'varname'),
675            (r'\$', Name.Variable, 'varname'),
676            (r'(declare)(\s+)(variable)(\s+)(\$)',
677             bygroups(Keyword.Declaration, Text, Keyword.Declaration, Text, Name.Variable), 'varname'),
678
679            # ANNOTATED GLOBAL VARIABLES AND FUNCTIONS
680            (r'(declare)(\s+)(\%)', bygroups(Keyword.Declaration, Text, Name.Decorator), 'annotationname'),
681
682            # ITEMTYPE
683            (r'(\))(\s+)(as)', bygroups(Operator, Text, Keyword), 'itemtype'),
684
685            (r'(element|attribute|schema-element|schema-attribute|comment|'
686             r'text|node|document-node|empty-sequence)(\s+)(\()',
687             pushstate_operator_kindtest_callback),
688
689            (r'(processing-instruction)(\s+)(\()',
690             pushstate_operator_kindtestforpi_callback),
691
692            (r'(<!--)', pushstate_operator_xmlcomment_callback),
693
694            (r'(<\?)', pushstate_operator_processing_instruction_callback),
695
696            (r'(<!\[CDATA\[)', pushstate_operator_cdata_section_callback),
697
698            # (r'</', Name.Tag, 'end_tag'),
699            (r'(<)', pushstate_operator_starttag_callback),
700
701            (r'(declare)(\s+)(boundary-space)',
702             bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'xmlspace_decl'),
703
704            (r'(validate)(\s+)(lax|strict)',
705             pushstate_operator_root_validate_withmode),
706            (r'(validate)(\s*)(\{)', pushstate_operator_root_validate),
707            (r'(typeswitch)(\s*)(\()', bygroups(Keyword, Text, Punctuation)),
708            (r'(switch)(\s*)(\()', bygroups(Keyword, Text, Punctuation)),
709            (r'(element|attribute|namespace)(\s*)(\{)',
710             pushstate_operator_root_construct_callback),
711
712            (r'(document|text|processing-instruction|comment)(\s*)(\{)',
713             pushstate_operator_root_construct_callback),
714            # ATTRIBUTE
715            (r'(attribute)(\s+)(?=' + qname + r')',
716             bygroups(Keyword, Text), 'attribute_qname'),
717            # ELEMENT
718            (r'(element)(\s+)(?=' + qname + r')',
719             bygroups(Keyword, Text), 'element_qname'),
720            # PROCESSING_INSTRUCTION
721            (r'(processing-instruction|namespace)(\s+)(' + ncname + r')(\s*)(\{)',
722             bygroups(Keyword, Text, Name.Variable, Text, Punctuation),
723             'operator'),
724
725            (r'(declare|define)(\s+)(function)',
726             bygroups(Keyword.Declaration, Text, Keyword.Declaration)),
727
728            (r'(\{|\[)', pushstate_operator_root_callback),
729
730            (r'(unordered|ordered)(\s*)(\{)',
731             pushstate_operator_order_callback),
732
733            (r'(map|array)(\s*)(\{)',
734             pushstate_operator_map_callback),
735
736            (r'(declare)(\s+)(ordering)',
737             bygroups(Keyword.Declaration, Text, Keyword.Declaration), 'declareordering'),
738
739            (r'(xquery)(\s+)(version)',
740             bygroups(Keyword.Pseudo, Text, Keyword.Pseudo), 'xqueryversion'),
741
742            (r'(\(#)(\s*)', bygroups(Punctuation, Text), 'pragma'),
743
744            # sometimes return can occur in root state
745            (r'return', Keyword),
746
747            (r'(declare)(\s+)(option)', bygroups(Keyword.Declaration, Text, Keyword.Declaration),
748             'option'),
749
750            # URI LITERALS - single and double quoted
751            (r'(at)(\s+)('+stringdouble+')', String.Double, 'namespacedecl'),
752            (r'(at)(\s+)('+stringsingle+')', String.Single, 'namespacedecl'),
753
754            (r'(ancestor-or-self|ancestor|attribute|child|descendant-or-self)(::)',
755             bygroups(Keyword, Punctuation)),
756            (r'(descendant|following-sibling|following|parent|preceding-sibling'
757             r'|preceding|self)(::)', bygroups(Keyword, Punctuation)),
758
759            (r'(if)(\s*)(\()', bygroups(Keyword, Text, Punctuation)),
760
761            (r'then|else', Keyword),
762
763            # eXist specific XQUF
764            (r'(update)(\s*)(insert|delete|replace|value|rename)', bygroups(Keyword, Text, Keyword)),
765            (r'(into|following|preceding|with)', Keyword),
766
767            # Marklogic specific
768            (r'(try)(\s*)', bygroups(Keyword, Text), 'root'),
769            (r'(catch)(\s*)(\()(\$)',
770             bygroups(Keyword, Text, Punctuation, Name.Variable), 'varname'),
771
772
773            (r'(@'+qname+')', Name.Attribute, 'operator'),
774            (r'(@'+ncname+')', Name.Attribute, 'operator'),
775            (r'@\*:'+ncname, Name.Attribute, 'operator'),
776            (r'@\*', Name.Attribute, 'operator'),
777            (r'(@)', Name.Attribute, 'operator'),
778
779            (r'//|/|\+|-|;|,|\(|\)', Punctuation),
780
781            # STANDALONE QNAMES
782            (qname + r'(?=\s*\{)', Name.Tag, 'qname_braren'),
783            (qname + r'(?=\s*\([^:])', Name.Function, 'qname_braren'),
784            (r'(' + qname + ')(#)([0-9]+)', bygroups(Name.Function, Keyword.Type, Number.Integer)),
785            (qname, Name.Tag, 'operator'),
786        ]
787    }
788
789
790class QmlLexer(RegexLexer):
791    """
792    For QML files. See http://doc.qt.digia.com/4.7/qdeclarativeintroduction.html.
793
794    .. versionadded:: 1.6
795    """
796
797    # QML is based on javascript, so much of this is taken from the
798    # JavascriptLexer above.
799
800    name = 'QML'
801    aliases = ['qml', 'qbs']
802    filenames = ['*.qml', '*.qbs']
803    mimetypes = ['application/x-qml', 'application/x-qt.qbs+qml']
804
805    # pasted from JavascriptLexer, with some additions
806    flags = re.DOTALL | re.MULTILINE
807
808    tokens = {
809        'commentsandwhitespace': [
810            (r'\s+', Text),
811            (r'<!--', Comment),
812            (r'//.*?\n', Comment.Single),
813            (r'/\*.*?\*/', Comment.Multiline)
814        ],
815        'slashstartsregex': [
816            include('commentsandwhitespace'),
817            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
818             r'([gim]+\b|\B)', String.Regex, '#pop'),
819            (r'(?=/)', Text, ('#pop', 'badregex')),
820            default('#pop')
821        ],
822        'badregex': [
823            (r'\n', Text, '#pop')
824        ],
825        'root': [
826            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
827            include('commentsandwhitespace'),
828            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
829             r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
830            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
831            (r'[})\].]', Punctuation),
832
833            # QML insertions
834            (r'\bid\s*:\s*[A-Za-z][\w.]*', Keyword.Declaration,
835             'slashstartsregex'),
836            (r'\b[A-Za-z][\w.]*\s*:', Keyword, 'slashstartsregex'),
837
838            # the rest from JavascriptLexer
839            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
840             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
841             r'this)\b', Keyword, 'slashstartsregex'),
842            (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
843            (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
844             r'extends|final|float|goto|implements|import|int|interface|long|native|'
845             r'package|private|protected|public|short|static|super|synchronized|throws|'
846             r'transient|volatile)\b', Keyword.Reserved),
847            (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
848            (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
849             r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
850             r'decodeURIComponent|encodeURI|encodeURIComponent|'
851             r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
852             r'window)\b', Name.Builtin),
853            (r'[$a-zA-Z_]\w*', Name.Other),
854            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
855            (r'0x[0-9a-fA-F]+', Number.Hex),
856            (r'[0-9]+', Number.Integer),
857            (r'"(\\\\|\\"|[^"])*"', String.Double),
858            (r"'(\\\\|\\'|[^'])*'", String.Single),
859        ]
860    }
861
862
863class CirruLexer(RegexLexer):
864    """
865    Syntax rules of Cirru can be found at:
866    http://cirru.org/
867
868    * using ``()`` for expressions, but restricted in a same line
869    * using ``""`` for strings, with ``\`` for escaping chars
870    * using ``$`` as folding operator
871    * using ``,`` as unfolding operator
872    * using indentations for nested blocks
873
874    .. versionadded:: 2.0
875    """
876
877    name = 'Cirru'
878    aliases = ['cirru']
879    filenames = ['*.cirru']
880    mimetypes = ['text/x-cirru']
881    flags = re.MULTILINE
882
883    tokens = {
884        'string': [
885            (r'[^"\\\n]', String),
886            (r'\\', String.Escape, 'escape'),
887            (r'"', String, '#pop'),
888        ],
889        'escape': [
890            (r'.', String.Escape, '#pop'),
891        ],
892        'function': [
893            (r'\,', Operator, '#pop'),
894            (r'[^\s"()]+', Name.Function, '#pop'),
895            (r'\)', Operator, '#pop'),
896            (r'(?=\n)', Text, '#pop'),
897            (r'\(', Operator, '#push'),
898            (r'"', String, ('#pop', 'string')),
899            (r'[ ]+', Text.Whitespace),
900        ],
901        'line': [
902            (r'(?<!\w)\$(?!\w)', Operator, 'function'),
903            (r'\(', Operator, 'function'),
904            (r'\)', Operator),
905            (r'\n', Text, '#pop'),
906            (r'"', String, 'string'),
907            (r'[ ]+', Text.Whitespace),
908            (r'[+-]?[\d.]+\b', Number),
909            (r'[^\s"()]+', Name.Variable)
910        ],
911        'root': [
912            (r'^\n+', Text.Whitespace),
913            default(('line', 'function')),
914        ]
915    }
916
917
918class SlimLexer(ExtendedRegexLexer):
919    """
920    For Slim markup.
921
922    .. versionadded:: 2.0
923    """
924
925    name = 'Slim'
926    aliases = ['slim']
927    filenames = ['*.slim']
928    mimetypes = ['text/x-slim']
929
930    flags = re.IGNORECASE
931    _dot = r'(?: \|\n(?=.* \|)|.)'
932    tokens = {
933        'root': [
934            (r'[ \t]*\n', Text),
935            (r'[ \t]*', _indentation),
936        ],
937
938        'css': [
939            (r'\.[\w:-]+', Name.Class, 'tag'),
940            (r'\#[\w:-]+', Name.Function, 'tag'),
941        ],
942
943        'eval-or-plain': [
944            (r'([ \t]*==?)(.*\n)',
945             bygroups(Punctuation, using(RubyLexer)),
946             'root'),
947            (r'[ \t]+[\w:-]+(?==)', Name.Attribute, 'html-attributes'),
948            default('plain'),
949        ],
950
951        'content': [
952            include('css'),
953            (r'[\w:-]+:[ \t]*\n', Text, 'plain'),
954            (r'(-)(.*\n)',
955             bygroups(Punctuation, using(RubyLexer)),
956             '#pop'),
957            (r'\|' + _dot + r'*\n', _starts_block(Text, 'plain'), '#pop'),
958            (r'/' + _dot + r'*\n', _starts_block(Comment.Preproc, 'slim-comment-block'), '#pop'),
959            (r'[\w:-]+', Name.Tag, 'tag'),
960            include('eval-or-plain'),
961        ],
962
963        'tag': [
964            include('css'),
965            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
966            (r'[ \t]+\n', Punctuation, '#pop:2'),
967            include('eval-or-plain'),
968        ],
969
970        'plain': [
971            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
972            (r'(#\{)(.*?)(\})',
973             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
974            (r'\n', Text, 'root'),
975        ],
976
977        'html-attributes': [
978            (r'=', Punctuation),
979            (r'"[^"]+"', using(RubyLexer), 'tag'),
980            (r'\'[^\']+\'', using(RubyLexer), 'tag'),
981            (r'\w+', Text, 'tag'),
982        ],
983
984        'slim-comment-block': [
985            (_dot + '+', Comment.Preproc),
986            (r'\n', Text, 'root'),
987        ],
988    }
989