1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.dotnet
4    ~~~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for .net languages.
7
8    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11import re
12
13from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \
14    using, this, default, words
15from pygments.token import Punctuation, \
16    Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other
17from pygments.util import get_choice_opt, iteritems
18from pygments import unistring as uni
19
20from pygments.lexers.html import XmlLexer
21
22__all__ = ['CSharpLexer', 'NemerleLexer', 'BooLexer', 'VbNetLexer',
23           'CSharpAspxLexer', 'VbNetAspxLexer', 'FSharpLexer']
24
25
26class CSharpLexer(RegexLexer):
27    """
28    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
29    source code.
30
31    Additional options accepted:
32
33    `unicodelevel`
34      Determines which Unicode characters this lexer allows for identifiers.
35      The possible values are:
36
37      * ``none`` -- only the ASCII letters and numbers are allowed. This
38        is the fastest selection.
39      * ``basic`` -- all Unicode characters from the specification except
40        category ``Lo`` are allowed.
41      * ``full`` -- all Unicode characters as specified in the C# specs
42        are allowed.  Note that this means a considerable slowdown since the
43        ``Lo`` category has more than 40,000 characters in it!
44
45      The default value is ``basic``.
46
47      .. versionadded:: 0.8
48    """
49
50    name = 'C#'
51    aliases = ['csharp', 'c#']
52    filenames = ['*.cs']
53    mimetypes = ['text/x-csharp']  # inferred
54
55    flags = re.MULTILINE | re.DOTALL | re.UNICODE
56
57    # for the range of allowed unicode characters in identifiers, see
58    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
59
60    levels = {
61        'none': '@?[_a-zA-Z]\w*',
62        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
63                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
64                                    'Cf', 'Mn', 'Mc') + ']*'),
65        'full': ('@?(?:_|[^' +
66                 uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])'
67                 + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
68                                        'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
69    }
70
71    tokens = {}
72    token_variants = True
73
74    for levelname, cs_ident in iteritems(levels):
75        tokens[levelname] = {
76            'root': [
77                # method names
78                (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)'  # return type
79                 r'(' + cs_ident + ')'                            # method name
80                 r'(\s*)(\()',                               # signature start
81                 bygroups(using(this), Name.Function, Text, Punctuation)),
82                (r'^\s*\[.*?\]', Name.Attribute),
83                (r'[^\S\n]+', Text),
84                (r'\\\n', Text),  # line continuation
85                (r'//.*?\n', Comment.Single),
86                (r'/[*].*?[*]/', Comment.Multiline),
87                (r'\n', Text),
88                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
89                (r'[{}]', Punctuation),
90                (r'@"(""|[^"])*"', String),
91                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
92                (r"'\\.'|'[^\\]'", String.Char),
93                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
94                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
95                (r'#[ \t]*(if|endif|else|elif|define|undef|'
96                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
97                 Comment.Preproc),
98                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
99                 Keyword)),
100                (r'(abstract|as|async|await|base|break|by|case|catch|'
101                 r'checked|const|continue|default|delegate|'
102                 r'do|else|enum|event|explicit|extern|false|finally|'
103                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
104                 r'internal|is|let|lock|new|null|on|operator|'
105                 r'out|override|params|private|protected|public|readonly|'
106                 r'ref|return|sealed|sizeof|stackalloc|static|'
107                 r'switch|this|throw|true|try|typeof|'
108                 r'unchecked|unsafe|virtual|void|while|'
109                 r'get|set|new|partial|yield|add|remove|value|alias|ascending|'
110                 r'descending|from|group|into|orderby|select|thenby|where|'
111                 r'join|equals)\b', Keyword),
112                (r'(global)(::)', bygroups(Keyword, Punctuation)),
113                (r'(bool|byte|char|decimal|double|dynamic|float|int|long|object|'
114                 r'sbyte|short|string|uint|ulong|ushort|var)\b\??', Keyword.Type),
115                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
116                (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'),
117                (cs_ident, Name),
118            ],
119            'class': [
120                (cs_ident, Name.Class, '#pop'),
121                default('#pop'),
122            ],
123            'namespace': [
124                (r'(?=\()', Text, '#pop'),  # using (resource)
125                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop'),
126            ]
127        }
128
129    def __init__(self, **options):
130        level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic')
131        if level not in self._all_tokens:
132            # compile the regexes now
133            self._tokens = self.__class__.process_tokendef(level)
134        else:
135            self._tokens = self._all_tokens[level]
136
137        RegexLexer.__init__(self, **options)
138
139
140class NemerleLexer(RegexLexer):
141    """
142    For `Nemerle <http://nemerle.org>`_ source code.
143
144    Additional options accepted:
145
146    `unicodelevel`
147      Determines which Unicode characters this lexer allows for identifiers.
148      The possible values are:
149
150      * ``none`` -- only the ASCII letters and numbers are allowed. This
151        is the fastest selection.
152      * ``basic`` -- all Unicode characters from the specification except
153        category ``Lo`` are allowed.
154      * ``full`` -- all Unicode characters as specified in the C# specs
155        are allowed.  Note that this means a considerable slowdown since the
156        ``Lo`` category has more than 40,000 characters in it!
157
158      The default value is ``basic``.
159
160    .. versionadded:: 1.5
161    """
162
163    name = 'Nemerle'
164    aliases = ['nemerle']
165    filenames = ['*.n']
166    mimetypes = ['text/x-nemerle']  # inferred
167
168    flags = re.MULTILINE | re.DOTALL | re.UNICODE
169
170    # for the range of allowed unicode characters in identifiers, see
171    # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
172
173    levels = {
174        'none': '@?[_a-zA-Z]\w*',
175        'basic': ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
176                  '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
177                                    'Cf', 'Mn', 'Mc') + ']*'),
178        'full': ('@?(?:_|[^' +
179                 uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])'
180                 + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
181                                        'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
182    }
183
184    tokens = {}
185    token_variants = True
186
187    for levelname, cs_ident in iteritems(levels):
188        tokens[levelname] = {
189            'root': [
190                # method names
191                (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)'  # return type
192                 r'(' + cs_ident + ')'                            # method name
193                 r'(\s*)(\()',                               # signature start
194                 bygroups(using(this), Name.Function, Text, Punctuation)),
195                (r'^\s*\[.*?\]', Name.Attribute),
196                (r'[^\S\n]+', Text),
197                (r'\\\n', Text),  # line continuation
198                (r'//.*?\n', Comment.Single),
199                (r'/[*].*?[*]/', Comment.Multiline),
200                (r'\n', Text),
201                (r'\$\s*"', String, 'splice-string'),
202                (r'\$\s*<#', String, 'splice-string2'),
203                (r'<#', String, 'recursive-string'),
204
205                (r'(<\[)\s*(' + cs_ident + ':)?', Keyword),
206                (r'\]\>', Keyword),
207
208                # quasiquotation only
209                (r'\$' + cs_ident, Name),
210                (r'(\$)(\()', bygroups(Name, Punctuation),
211                 'splice-string-content'),
212
213                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
214                (r'[{}]', Punctuation),
215                (r'@"(""|[^"])*"', String),
216                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
217                (r"'\\.'|'[^\\]'", String.Char),
218                (r"0[xX][0-9a-fA-F]+[Ll]?", Number),
219                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number),
220                (r'#[ \t]*(if|endif|else|elif|define|undef|'
221                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
222                 Comment.Preproc),
223                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
224                 Keyword)),
225                (r'(abstract|and|as|base|catch|def|delegate|'
226                 r'enum|event|extern|false|finally|'
227                 r'fun|implements|interface|internal|'
228                 r'is|macro|match|matches|module|mutable|new|'
229                 r'null|out|override|params|partial|private|'
230                 r'protected|public|ref|sealed|static|'
231                 r'syntax|this|throw|true|try|type|typeof|'
232                 r'virtual|volatile|when|where|with|'
233                 r'assert|assert2|async|break|checked|continue|do|else|'
234                 r'ensures|for|foreach|if|late|lock|new|nolate|'
235                 r'otherwise|regexp|repeat|requires|return|surroundwith|'
236                 r'unchecked|unless|using|while|yield)\b', Keyword),
237                (r'(global)(::)', bygroups(Keyword, Punctuation)),
238                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
239                 r'short|string|uint|ulong|ushort|void|array|list)\b\??',
240                 Keyword.Type),
241                (r'(:>?)\s*(' + cs_ident + r'\??)',
242                 bygroups(Punctuation, Keyword.Type)),
243                (r'(class|struct|variant|module)(\s+)',
244                 bygroups(Keyword, Text), 'class'),
245                (r'(namespace|using)(\s+)', bygroups(Keyword, Text),
246                 'namespace'),
247                (cs_ident, Name),
248            ],
249            'class': [
250                (cs_ident, Name.Class, '#pop')
251            ],
252            'namespace': [
253                (r'(?=\()', Text, '#pop'),  # using (resource)
254                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
255            ],
256            'splice-string': [
257                (r'[^"$]',  String),
258                (r'\$' + cs_ident, Name),
259                (r'(\$)(\()', bygroups(Name, Punctuation),
260                 'splice-string-content'),
261                (r'\\"',  String),
262                (r'"',  String, '#pop')
263            ],
264            'splice-string2': [
265                (r'[^#<>$]',  String),
266                (r'\$' + cs_ident, Name),
267                (r'(\$)(\()', bygroups(Name, Punctuation),
268                 'splice-string-content'),
269                (r'<#',  String, '#push'),
270                (r'#>',  String, '#pop')
271            ],
272            'recursive-string': [
273                (r'[^#<>]',  String),
274                (r'<#',  String, '#push'),
275                (r'#>',  String, '#pop')
276            ],
277            'splice-string-content': [
278                (r'if|match', Keyword),
279                (r'[~!%^&*+=|\[\]:;,.<>/?-\\"$ ]', Punctuation),
280                (cs_ident, Name),
281                (r'\d+', Number),
282                (r'\(', Punctuation, '#push'),
283                (r'\)', Punctuation, '#pop')
284            ]
285        }
286
287    def __init__(self, **options):
288        level = get_choice_opt(options, 'unicodelevel', list(self.tokens),
289                               'basic')
290        if level not in self._all_tokens:
291            # compile the regexes now
292            self._tokens = self.__class__.process_tokendef(level)
293        else:
294            self._tokens = self._all_tokens[level]
295
296        RegexLexer.__init__(self, **options)
297
298
299class BooLexer(RegexLexer):
300    """
301    For `Boo <http://boo.codehaus.org/>`_ source code.
302    """
303
304    name = 'Boo'
305    aliases = ['boo']
306    filenames = ['*.boo']
307    mimetypes = ['text/x-boo']
308
309    tokens = {
310        'root': [
311            (r'\s+', Text),
312            (r'(#|//).*$', Comment.Single),
313            (r'/[*]', Comment.Multiline, 'comment'),
314            (r'[]{}:(),.;[]', Punctuation),
315            (r'\\\n', Text),
316            (r'\\', Text),
317            (r'(in|is|and|or|not)\b', Operator.Word),
318            (r'/(\\\\|\\/|[^/\s])/', String.Regex),
319            (r'@/(\\\\|\\/|[^/])*/', String.Regex),
320            (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator),
321            (r'(as|abstract|callable|constructor|destructor|do|import|'
322             r'enum|event|final|get|interface|internal|of|override|'
323             r'partial|private|protected|public|return|set|static|'
324             r'struct|transient|virtual|yield|super|and|break|cast|'
325             r'continue|elif|else|ensure|except|for|given|goto|if|in|'
326             r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|'
327             r'while|from|as)\b', Keyword),
328            (r'def(?=\s+\(.*?\))', Keyword),
329            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
330            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
331            (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'),
332            (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|'
333             r'assert|checked|enumerate|filter|getter|len|lock|map|'
334             r'matrix|max|min|normalArrayIndexing|print|property|range|'
335             r'rawArrayIndexing|required|typeof|unchecked|using|'
336             r'yieldAll|zip)\b', Name.Builtin),
337            (r'"""(\\\\|\\"|.*?)"""', String.Double),
338            (r'"(\\\\|\\"|[^"]*?)"', String.Double),
339            (r"'(\\\\|\\'|[^']*?)'", String.Single),
340            (r'[a-zA-Z_]\w*', Name),
341            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
342            (r'[0-9][0-9.]*(ms?|d|h|s)', Number),
343            (r'0\d+', Number.Oct),
344            (r'0x[a-fA-F0-9]+', Number.Hex),
345            (r'\d+L', Number.Integer.Long),
346            (r'\d+', Number.Integer),
347        ],
348        'comment': [
349            ('/[*]', Comment.Multiline, '#push'),
350            ('[*]/', Comment.Multiline, '#pop'),
351            ('[^/*]', Comment.Multiline),
352            ('[*/]', Comment.Multiline)
353        ],
354        'funcname': [
355            ('[a-zA-Z_]\w*', Name.Function, '#pop')
356        ],
357        'classname': [
358            ('[a-zA-Z_]\w*', Name.Class, '#pop')
359        ],
360        'namespace': [
361            ('[a-zA-Z_][\w.]*', Name.Namespace, '#pop')
362        ]
363    }
364
365
366class VbNetLexer(RegexLexer):
367    """
368    For
369    `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_
370    source code.
371    """
372
373    name = 'VB.net'
374    aliases = ['vb.net', 'vbnet']
375    filenames = ['*.vb', '*.bas']
376    mimetypes = ['text/x-vbnet', 'text/x-vba']  # (?)
377
378    uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \
379               '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
380                                 'Cf', 'Mn', 'Mc') + ']*'
381
382    flags = re.MULTILINE | re.IGNORECASE
383    tokens = {
384        'root': [
385            (r'^\s*<.*?>', Name.Attribute),
386            (r'\s+', Text),
387            (r'\n', Text),
388            (r'rem\b.*?\n', Comment),
389            (r"'.*?\n", Comment),
390            (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#Else|#End\s+If|#Const|'
391             r'#ExternalSource.*?\n|#End\s+ExternalSource|'
392             r'#Region.*?\n|#End\s+Region|#ExternalChecksum',
393             Comment.Preproc),
394            (r'[(){}!#,.:]', Punctuation),
395            (r'Option\s+(Strict|Explicit|Compare)\s+'
396             r'(On|Off|Binary|Text)', Keyword.Declaration),
397            (words((
398                'AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case',
399                'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl',
400                'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng',
401                'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare',
402                'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else',
403                'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False',
404                'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo',
405                'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let',
406                'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase',
407                'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing',
408                'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator',
409                'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides',
410                'ParamArray', 'Partial', 'Private', 'Protected', 'Public',
411                'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume',
412                'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single',
413                'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To',
414                'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While',
415                'Widening', 'With', 'WithEvents', 'WriteOnly'),
416                   prefix='(?<!\.)', suffix=r'\b'), Keyword),
417            (r'(?<!\.)End\b', Keyword, 'end'),
418            (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'),
419            (r'(?<!\.)(Function|Sub|Property)(\s+)',
420             bygroups(Keyword, Text), 'funcname'),
421            (r'(?<!\.)(Class|Structure|Enum)(\s+)',
422             bygroups(Keyword, Text), 'classname'),
423            (r'(?<!\.)(Module|Namespace|Imports)(\s+)',
424             bygroups(Keyword, Text), 'namespace'),
425            (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|'
426             r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|'
427             r'UShort)\b', Keyword.Type),
428            (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|'
429             r'Or|OrElse|TypeOf|Xor)\b', Operator.Word),
430            (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|'
431             r'<=|>=|<>|[-&*/\\^+=<>\[\]]',
432             Operator),
433            ('"', String, 'string'),
434            (r'_\n', Text),  # Line continuation  (must be before Name)
435            (uni_name + '[%&@!#$]?', Name),
436            ('#.*?#', Literal.Date),
437            (r'(\d+\.\d*|\d*\.\d+)(F[+-]?[0-9]+)?', Number.Float),
438            (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer),
439            (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer),
440            (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer),
441        ],
442        'string': [
443            (r'""', String),
444            (r'"C?', String, '#pop'),
445            (r'[^"]+', String),
446        ],
447        'dim': [
448            (uni_name, Name.Variable, '#pop'),
449            default('#pop'),  # any other syntax
450        ],
451        'funcname': [
452            (uni_name, Name.Function, '#pop'),
453        ],
454        'classname': [
455            (uni_name, Name.Class, '#pop'),
456        ],
457        'namespace': [
458            (uni_name, Name.Namespace),
459            (r'\.', Name.Namespace),
460            default('#pop'),
461        ],
462        'end': [
463            (r'\s+', Text),
464            (r'(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b',
465             Keyword, '#pop'),
466            default('#pop'),
467        ]
468    }
469
470    def analyse_text(text):
471        if re.search(r'^\s*(#If|Module|Namespace)', text, re.MULTILINE):
472            return 0.5
473
474
475class GenericAspxLexer(RegexLexer):
476    """
477    Lexer for ASP.NET pages.
478    """
479
480    name = 'aspx-gen'
481    filenames = []
482    mimetypes = []
483
484    flags = re.DOTALL
485
486    tokens = {
487        'root': [
488            (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)),
489            (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer),
490                                                        Other,
491                                                        using(XmlLexer))),
492            (r'(.+?)(?=<)', using(XmlLexer)),
493            (r'.+', using(XmlLexer)),
494        ],
495    }
496
497
498# TODO support multiple languages within the same source file
499class CSharpAspxLexer(DelegatingLexer):
500    """
501    Lexer for highlighting C# within ASP.NET pages.
502    """
503
504    name = 'aspx-cs'
505    aliases = ['aspx-cs']
506    filenames = ['*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd']
507    mimetypes = []
508
509    def __init__(self, **options):
510        super(CSharpAspxLexer, self).__init__(CSharpLexer, GenericAspxLexer,
511                                              **options)
512
513    def analyse_text(text):
514        if re.search(r'Page\s*Language="C#"', text, re.I) is not None:
515            return 0.2
516        elif re.search(r'script[^>]+language=["\']C#', text, re.I) is not None:
517            return 0.15
518
519
520class VbNetAspxLexer(DelegatingLexer):
521    """
522    Lexer for highlighting Visual Basic.net within ASP.NET pages.
523    """
524
525    name = 'aspx-vb'
526    aliases = ['aspx-vb']
527    filenames = ['*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd']
528    mimetypes = []
529
530    def __init__(self, **options):
531        super(VbNetAspxLexer, self).__init__(VbNetLexer, GenericAspxLexer,
532                                             **options)
533
534    def analyse_text(text):
535        if re.search(r'Page\s*Language="Vb"', text, re.I) is not None:
536            return 0.2
537        elif re.search(r'script[^>]+language=["\']vb', text, re.I) is not None:
538            return 0.15
539
540
541# Very close to functional.OcamlLexer
542class FSharpLexer(RegexLexer):
543    """
544    For the F# language (version 3.0).
545
546    AAAAACK Strings
547    http://research.microsoft.com/en-us/um/cambridge/projects/fsharp/manual/spec.html#_Toc335818775
548
549    .. versionadded:: 1.5
550    """
551
552    name = 'FSharp'
553    aliases = ['fsharp']
554    filenames = ['*.fs', '*.fsi']
555    mimetypes = ['text/x-fsharp']
556
557    keywords = [
558        'abstract', 'as', 'assert', 'base', 'begin', 'class', 'default',
559        'delegate', 'do!', 'do', 'done', 'downcast', 'downto', 'elif', 'else',
560        'end', 'exception', 'extern', 'false', 'finally', 'for', 'function',
561        'fun', 'global', 'if', 'inherit', 'inline', 'interface', 'internal',
562        'in', 'lazy', 'let!', 'let', 'match', 'member', 'module', 'mutable',
563        'namespace', 'new', 'null', 'of', 'open', 'override', 'private', 'public',
564        'rec', 'return!', 'return', 'select', 'static', 'struct', 'then', 'to',
565        'true', 'try', 'type', 'upcast', 'use!', 'use', 'val', 'void', 'when',
566        'while', 'with', 'yield!', 'yield',
567    ]
568    # Reserved words; cannot hurt to color them as keywords too.
569    keywords += [
570        'atomic', 'break', 'checked', 'component', 'const', 'constraint',
571        'constructor', 'continue', 'eager', 'event', 'external', 'fixed',
572        'functor', 'include', 'method', 'mixin', 'object', 'parallel',
573        'process', 'protected', 'pure', 'sealed', 'tailcall', 'trait',
574        'virtual', 'volatile',
575    ]
576    keyopts = [
577        '!=', '#', '&&', '&', '\(', '\)', '\*', '\+', ',', '-\.',
578        '->', '-', '\.\.', '\.', '::', ':=', ':>', ':', ';;', ';', '<-',
579        '<\]', '<', '>\]', '>', '\?\?', '\?', '\[<', '\[\|', '\[', '\]',
580        '_', '`', '\{', '\|\]', '\|', '\}', '~', '<@@', '<@', '=', '@>', '@@>',
581    ]
582
583    operators = r'[!$%&*+\./:<=>?@^|~-]'
584    word_operators = ['and', 'or', 'not']
585    prefix_syms = r'[!?~]'
586    infix_syms = r'[=<>@^|&+\*/$%-]'
587    primitives = [
588        'sbyte', 'byte', 'char', 'nativeint', 'unativeint', 'float32', 'single',
589        'float', 'double', 'int8', 'uint8', 'int16', 'uint16', 'int32',
590        'uint32', 'int64', 'uint64', 'decimal', 'unit', 'bool', 'string',
591        'list', 'exn', 'obj', 'enum',
592    ]
593
594    # See http://msdn.microsoft.com/en-us/library/dd233181.aspx and/or
595    # http://fsharp.org/about/files/spec.pdf for reference.  Good luck.
596
597    tokens = {
598        'escape-sequence': [
599            (r'\\[\\"\'ntbrafv]', String.Escape),
600            (r'\\[0-9]{3}', String.Escape),
601            (r'\\u[0-9a-fA-F]{4}', String.Escape),
602            (r'\\U[0-9a-fA-F]{8}', String.Escape),
603        ],
604        'root': [
605            (r'\s+', Text),
606            (r'\(\)|\[\]', Name.Builtin.Pseudo),
607            (r'\b(?<!\.)([A-Z][\w\']*)(?=\s*\.)',
608             Name.Namespace, 'dotted'),
609            (r'\b([A-Z][\w\']*)', Name),
610            (r'///.*?\n', String.Doc),
611            (r'//.*?\n', Comment.Single),
612            (r'\(\*(?!\))', Comment, 'comment'),
613
614            (r'@"', String, 'lstring'),
615            (r'"""', String, 'tqs'),
616            (r'"', String, 'string'),
617
618            (r'\b(open|module)(\s+)([\w.]+)',
619             bygroups(Keyword, Text, Name.Namespace)),
620            (r'\b(let!?)(\s+)(\w+)',
621             bygroups(Keyword, Text, Name.Variable)),
622            (r'\b(type)(\s+)(\w+)',
623             bygroups(Keyword, Text, Name.Class)),
624            (r'\b(member|override)(\s+)(\w+)(\.)(\w+)',
625             bygroups(Keyword, Text, Name, Punctuation, Name.Function)),
626            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
627            (r'``([^`\n\r\t]|`[^`\n\r\t])+``', Name),
628            (r'(%s)' % '|'.join(keyopts), Operator),
629            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
630            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
631            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
632            (r'#[ \t]*(if|endif|else|line|nowarn|light|\d+)\b.*?\n',
633             Comment.Preproc),
634
635            (r"[^\W\d][\w']*", Name),
636
637            (r'\d[\d_]*[uU]?[yslLnQRZINGmM]?', Number.Integer),
638            (r'0[xX][\da-fA-F][\da-fA-F_]*[uU]?[yslLn]?[fF]?', Number.Hex),
639            (r'0[oO][0-7][0-7_]*[uU]?[yslLn]?', Number.Oct),
640            (r'0[bB][01][01_]*[uU]?[yslLn]?', Number.Bin),
641            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)[fFmM]?',
642             Number.Float),
643
644            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'B?",
645             String.Char),
646            (r"'.'", String.Char),
647            (r"'", Keyword),  # a stray quote is another syntax element
648
649            (r'@?"', String.Double, 'string'),
650
651            (r'[~?][a-z][\w\']*:', Name.Variable),
652        ],
653        'dotted': [
654            (r'\s+', Text),
655            (r'\.', Punctuation),
656            (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
657            (r'[A-Z][\w\']*', Name, '#pop'),
658            (r'[a-z_][\w\']*', Name, '#pop'),
659            # e.g. dictionary index access
660            default('#pop'),
661        ],
662        'comment': [
663            (r'[^(*)@"]+', Comment),
664            (r'\(\*', Comment, '#push'),
665            (r'\*\)', Comment, '#pop'),
666            # comments cannot be closed within strings in comments
667            (r'@"', String, 'lstring'),
668            (r'"""', String, 'tqs'),
669            (r'"', String, 'string'),
670            (r'[(*)@]', Comment),
671        ],
672        'string': [
673            (r'[^\\"]+', String),
674            include('escape-sequence'),
675            (r'\\\n', String),
676            (r'\n', String),  # newlines are allowed in any string
677            (r'"B?', String, '#pop'),
678        ],
679        'lstring': [
680            (r'[^"]+', String),
681            (r'\n', String),
682            (r'""', String),
683            (r'"B?', String, '#pop'),
684        ],
685        'tqs': [
686            (r'[^"]+', String),
687            (r'\n', String),
688            (r'"""B?', String, '#pop'),
689            (r'"', String),
690        ],
691    }
692