1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.csound
4    ~~~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for Csound languages.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import RegexLexer, bygroups, default, include, using, words
15from pygments.token import Comment, Error, Keyword, Name, Number, Operator, Punctuation, \
16    String, Text, Whitespace
17from pygments.lexers._csound_builtins import OPCODES, DEPRECATED_OPCODES
18from pygments.lexers.html import HtmlLexer
19from pygments.lexers.python import PythonLexer
20from pygments.lexers.scripting import LuaLexer
21
22__all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer']
23
24newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text))
25
26
27class CsoundLexer(RegexLexer):
28    tokens = {
29        'whitespace': [
30            (r'[ \t]+', Text),
31            (r'/[*](?:.|\n)*?[*]/', Comment.Multiline),
32            (r'(?:;|//).*$', Comment.Single),
33            (r'(\\)(\n)', bygroups(Whitespace, Text))
34        ],
35
36        'preprocessor directives': [
37            (r'#(?:e(?:nd(?:if)?|lse)\b|##)|@@?[ \t]*\d+', Comment.Preproc),
38            (r'#includestr', Comment.Preproc, 'includestr directive'),
39            (r'#include', Comment.Preproc, 'include directive'),
40            (r'#[ \t]*define', Comment.Preproc, 'define directive'),
41            (r'#(?:ifn?def|undef)\b', Comment.Preproc, 'macro directive')
42        ],
43
44        'include directive': [
45            include('whitespace'),
46            (r'([^ \t]).*?\1', String, '#pop')
47        ],
48        'includestr directive': [
49            include('whitespace'),
50            (r'"', String, ('#pop', 'quoted string'))
51        ],
52
53        'define directive': [
54            (r'\n', Text),
55            include('whitespace'),
56            (r'([A-Z_a-z]\w*)(\()', bygroups(Comment.Preproc, Punctuation),
57             ('#pop', 'macro parameter name list')),
58            (r'[A-Z_a-z]\w*', Comment.Preproc, ('#pop', 'before macro body'))
59        ],
60        'macro parameter name list': [
61            include('whitespace'),
62            (r'[A-Z_a-z]\w*', Comment.Preproc),
63            (r"['#]", Punctuation),
64            (r'\)', Punctuation, ('#pop', 'before macro body'))
65        ],
66        'before macro body': [
67            (r'\n', Text),
68            include('whitespace'),
69            (r'#', Punctuation, ('#pop', 'macro body'))
70        ],
71        'macro body': [
72            (r'(?:\\(?!#)|[^#\\]|\n)+', Comment.Preproc),
73            (r'\\#', Comment.Preproc),
74            (r'(?<!\\)#', Punctuation, '#pop')
75        ],
76
77        'macro directive': [
78            include('whitespace'),
79            (r'[A-Z_a-z]\w*', Comment.Preproc, '#pop')
80        ],
81
82        'macro uses': [
83            (r'(\$[A-Z_a-z]\w*\.?)(\()', bygroups(Comment.Preproc, Punctuation),
84             'macro parameter value list'),
85            (r'\$[A-Z_a-z]\w*(?:\.|\b)', Comment.Preproc)
86        ],
87        'macro parameter value list': [
88            (r'(?:[^\'#"{()]|\{(?!\{))+', Comment.Preproc),
89            (r"['#]", Punctuation),
90            (r'"', String, 'macro parameter value quoted string'),
91            (r'\{\{', String, 'macro parameter value braced string'),
92            (r'\(', Comment.Preproc, 'macro parameter value parenthetical'),
93            (r'\)', Punctuation, '#pop')
94        ],
95        'macro parameter value quoted string': [
96            (r"\\[#'()]", Comment.Preproc),
97            (r"[#'()]", Error),
98            include('quoted string')
99        ],
100        'macro parameter value braced string': [
101            (r"\\[#'()]", Comment.Preproc),
102            (r"[#'()]", Error),
103            include('braced string')
104        ],
105        'macro parameter value parenthetical': [
106            (r'(?:[^\\()]|\\\))+', Comment.Preproc),
107            (r'\(', Comment.Preproc, '#push'),
108            (r'\)', Comment.Preproc, '#pop')
109        ],
110
111        'whitespace and macro uses': [
112            include('whitespace'),
113            include('macro uses')
114        ],
115
116        'numbers': [
117            (r'\d+[Ee][+-]?\d+|(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?', Number.Float),
118            (r'(0[Xx])([0-9A-Fa-f]+)', bygroups(Keyword.Type, Number.Hex)),
119            (r'\d+', Number.Integer)
120        ],
121
122        'quoted string': [
123            (r'"', String, '#pop'),
124            (r'[^"$]+', String),
125            include('macro uses'),
126            (r'[$]', String)
127        ],
128
129        'braced string': [
130            # Do nothing. This must be defined in subclasses.
131        ]
132    }
133
134
135class CsoundScoreLexer(CsoundLexer):
136    """
137    For `Csound <https://csound.com>`_ scores.
138
139    .. versionadded:: 2.1
140    """
141
142    name = 'Csound Score'
143    aliases = ['csound-score', 'csound-sco']
144    filenames = ['*.sco']
145
146    tokens = {
147        'root': [
148            (r'\n', Text),
149            include('whitespace and macro uses'),
150            include('preprocessor directives'),
151
152            (r'[aBbCdefiqstvxy]', Keyword),
153            # There is also a w statement that is generated internally and should not be
154            # used; see https://github.com/csound/csound/issues/750.
155
156            (r'z', Keyword.Constant),
157            # z is a constant equal to 800,000,000,000. 800 billion seconds is about
158            # 25,367.8 years. See also
159            # https://csound.com/docs/manual/ScoreTop.html and
160            # https://github.com/csound/csound/search?q=stof+path%3AEngine+filename%3Asread.c.
161
162            (r'([nNpP][pP])(\d+)', bygroups(Keyword, Number.Integer)),
163
164            (r'[mn]', Keyword, 'mark statement'),
165
166            include('numbers'),
167            (r'[!+\-*/^%&|<>#~.]', Operator),
168            (r'[()\[\]]', Punctuation),
169            (r'"', String, 'quoted string'),
170            (r'\{', Comment.Preproc, 'loop after left brace'),
171        ],
172
173        'mark statement': [
174            include('whitespace and macro uses'),
175            (r'[A-Z_a-z]\w*', Name.Label),
176            (r'\n', Text, '#pop')
177        ],
178
179        'loop after left brace': [
180            include('whitespace and macro uses'),
181            (r'\d+', Number.Integer, ('#pop', 'loop after repeat count')),
182        ],
183        'loop after repeat count': [
184            include('whitespace and macro uses'),
185            (r'[A-Z_a-z]\w*', Comment.Preproc, ('#pop', 'loop'))
186        ],
187        'loop': [
188            (r'\}', Comment.Preproc, '#pop'),
189            include('root')
190        ],
191
192        # Braced strings are not allowed in Csound scores, but this is needed because the
193        # superclass includes it.
194        'braced string': [
195            (r'\}\}', String, '#pop'),
196            (r'[^}]|\}(?!\})', String)
197        ]
198    }
199
200
201class CsoundOrchestraLexer(CsoundLexer):
202    """
203    For `Csound <https://csound.com>`_ orchestras.
204
205    .. versionadded:: 2.1
206    """
207
208    name = 'Csound Orchestra'
209    aliases = ['csound', 'csound-orc']
210    filenames = ['*.orc', '*.udo']
211
212    user_defined_opcodes = set()
213
214    def opcode_name_callback(lexer, match):
215        opcode = match.group(0)
216        lexer.user_defined_opcodes.add(opcode)
217        yield match.start(), Name.Function, opcode
218
219    def name_callback(lexer, match):
220        type_annotation_token = Keyword.Type
221
222        name = match.group(1)
223        if name in OPCODES or name in DEPRECATED_OPCODES:
224            yield match.start(), Name.Builtin, name
225        elif name in lexer.user_defined_opcodes:
226            yield match.start(), Name.Function, name
227        else:
228            type_annotation_token = Name
229            name_match = re.search(r'^(g?[afikSw])(\w+)', name)
230            if name_match:
231                yield name_match.start(1), Keyword.Type, name_match.group(1)
232                yield name_match.start(2), Name, name_match.group(2)
233            else:
234                yield match.start(), Name, name
235
236        if match.group(2):
237            yield match.start(2), Punctuation, match.group(2)
238            yield match.start(3), type_annotation_token, match.group(3)
239
240    tokens = {
241        'root': [
242            (r'\n', Text),
243
244            (r'^([ \t]*)(\w+)(:)([ \t]+|$)', bygroups(Text, Name.Label, Punctuation, Text)),
245
246            include('whitespace and macro uses'),
247            include('preprocessor directives'),
248
249            (r'\binstr\b', Keyword.Declaration, 'instrument numbers and identifiers'),
250            (r'\bopcode\b', Keyword.Declaration, 'after opcode keyword'),
251            (r'\b(?:end(?:in|op))\b', Keyword.Declaration),
252
253            include('partial statements')
254        ],
255
256        'partial statements': [
257            (r'\b(?:0dbfs|A4|k(?:r|smps)|nchnls(?:_i)?|sr)\b', Name.Variable.Global),
258
259            include('numbers'),
260
261            (r'\+=|-=|\*=|/=|<<|>>|<=|>=|==|!=|&&|\|\||[~¬]|[=!+\-*/^%&|<>#?:]', Operator),
262            (r'[(),\[\]]', Punctuation),
263
264            (r'"', String, 'quoted string'),
265            (r'\{\{', String, 'braced string'),
266
267            (words((
268                'do', 'else', 'elseif', 'endif', 'enduntil', 'fi', 'if', 'ithen', 'kthen',
269                'od', 'then', 'until', 'while',
270                ), prefix=r'\b', suffix=r'\b'), Keyword),
271            (words(('return', 'rireturn'), prefix=r'\b', suffix=r'\b'), Keyword.Pseudo),
272
273            (r'\b[ik]?goto\b', Keyword, 'goto label'),
274            (r'\b(r(?:einit|igoto)|tigoto)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
275             'goto label'),
276            (r'\b(c(?:g|in?|k|nk?)goto)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
277             ('goto label', 'goto argument')),
278            (r'\b(timout)(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
279             ('goto label', 'goto argument', 'goto argument')),
280            (r'\b(loop_[gl][et])(\(|\b)', bygroups(Keyword.Pseudo, Punctuation),
281             ('goto label', 'goto argument', 'goto argument', 'goto argument')),
282
283            (r'\bprintk?s\b', Name.Builtin, 'prints opcode'),
284            (r'\b(?:readscore|scoreline(?:_i)?)\b', Name.Builtin, 'Csound score opcode'),
285            (r'\bpyl?run[it]?\b', Name.Builtin, 'Python opcode'),
286            (r'\blua_(?:exec|opdef)\b', Name.Builtin, 'Lua opcode'),
287            (r'\bp\d+\b', Name.Variable.Instance),
288            (r'\b([A-Z_a-z]\w*)(?:(:)([A-Za-z]))?\b', name_callback)
289        ],
290
291        'instrument numbers and identifiers': [
292            include('whitespace and macro uses'),
293            (r'\d+|[A-Z_a-z]\w*', Name.Function),
294            (r'[+,]', Punctuation),
295            (r'\n', Text, '#pop')
296        ],
297
298        'after opcode keyword': [
299            include('whitespace and macro uses'),
300            (r'[A-Z_a-z]\w*', opcode_name_callback, ('#pop', 'opcode type signatures')),
301            (r'\n', Text, '#pop')
302        ],
303        'opcode type signatures': [
304            include('whitespace and macro uses'),
305
306            # https://github.com/csound/csound/search?q=XIDENT+path%3AEngine+filename%3Acsound_orc.lex
307            (r'0|[afijkKoOpPStV\[\]]+', Keyword.Type),
308
309            (r',', Punctuation),
310            (r'\n', Text, '#pop')
311        ],
312
313        'quoted string': [
314            (r'"', String, '#pop'),
315            (r'[^\\"$%)]+', String),
316            include('macro uses'),
317            include('escape sequences'),
318            include('format specifiers'),
319            (r'[\\$%)]', String)
320        ],
321        'braced string': [
322            (r'\}\}', String, '#pop'),
323            (r'(?:[^\\%)}]|\}(?!\}))+', String),
324            include('escape sequences'),
325            include('format specifiers'),
326            (r'[\\%)]', String)
327        ],
328        'escape sequences': [
329            # https://github.com/csound/csound/search?q=unquote_string+path%3AEngine+filename%3Acsound_orc_compile.c
330            (r'\\(?:[\\abnrt"]|[0-7]{1,3})', String.Escape)
331        ],
332        # Format specifiers are highlighted in all strings, even though only
333        #   fprintks        https://csound.com/docs/manual/fprintks.html
334        #   fprints         https://csound.com/docs/manual/fprints.html
335        #   printf/printf_i https://csound.com/docs/manual/printf.html
336        #   printks         https://csound.com/docs/manual/printks.html
337        #   prints          https://csound.com/docs/manual/prints.html
338        #   sprintf         https://csound.com/docs/manual/sprintf.html
339        #   sprintfk        https://csound.com/docs/manual/sprintfk.html
340        # work with strings that contain format specifiers. In addition, these opcodes’
341        # handling of format specifiers is inconsistent:
342        #   - fprintks and fprints accept %a and %A specifiers, and accept %s specifiers
343        #     starting in Csound 6.15.0.
344        #   - printks and prints accept %a and %A specifiers, but don’t accept %s
345        #     specifiers.
346        #   - printf, printf_i, sprintf, and sprintfk don’t accept %a and %A specifiers,
347        #     but accept %s specifiers.
348        # See https://github.com/csound/csound/issues/747 for more information.
349        'format specifiers': [
350            (r'%[#0\- +]*\d*(?:\.\d+)?[AE-GXac-giosux]', String.Interpol),
351            (r'%%', String.Escape)
352        ],
353
354        'goto argument': [
355            include('whitespace and macro uses'),
356            (r',', Punctuation, '#pop'),
357            include('partial statements')
358        ],
359        'goto label': [
360            include('whitespace and macro uses'),
361            (r'\w+', Name.Label, '#pop'),
362            default('#pop')
363        ],
364
365        'prints opcode': [
366            include('whitespace and macro uses'),
367            (r'"', String, 'prints quoted string'),
368            default('#pop')
369        ],
370        'prints quoted string': [
371            (r'\\\\[aAbBnNrRtT]', String.Escape),
372            (r'%[!nNrRtT]|[~^]{1,2}', String.Escape),
373            include('quoted string')
374        ],
375
376        'Csound score opcode': [
377            include('whitespace and macro uses'),
378            (r'"', String, 'quoted string'),
379            (r'\{\{', String, 'Csound score'),
380            (r'\n', Text, '#pop')
381        ],
382        'Csound score': [
383            (r'\}\}', String, '#pop'),
384            (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer))
385        ],
386
387        'Python opcode': [
388            include('whitespace and macro uses'),
389            (r'"', String, 'quoted string'),
390            (r'\{\{', String, 'Python'),
391            (r'\n', Text, '#pop')
392        ],
393        'Python': [
394            (r'\}\}', String, '#pop'),
395            (r'([^}]+)|\}(?!\})', using(PythonLexer))
396        ],
397
398        'Lua opcode': [
399            include('whitespace and macro uses'),
400            (r'"', String, 'quoted string'),
401            (r'\{\{', String, 'Lua'),
402            (r'\n', Text, '#pop')
403        ],
404        'Lua': [
405            (r'\}\}', String, '#pop'),
406            (r'([^}]+)|\}(?!\})', using(LuaLexer))
407        ]
408    }
409
410
411class CsoundDocumentLexer(RegexLexer):
412    """
413    For `Csound <https://csound.com>`_ documents.
414
415    .. versionadded:: 2.1
416    """
417
418    name = 'Csound Document'
419    aliases = ['csound-document', 'csound-csd']
420    filenames = ['*.csd']
421
422    # These tokens are based on those in XmlLexer in pygments/lexers/html.py. Making
423    # CsoundDocumentLexer a subclass of XmlLexer rather than RegexLexer may seem like a
424    # better idea, since Csound Document files look like XML files. However, Csound
425    # Documents can contain Csound comments (preceded by //, for example) before and
426    # after the root element, unescaped bitwise AND & and less than < operators, etc. In
427    # other words, while Csound Document files look like XML files, they may not actually
428    # be XML files.
429    tokens = {
430        'root': [
431            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
432            (r'(?:;|//).*$', Comment.Single),
433            (r'[^/;<]+|/(?!/)', Text),
434
435            (r'<\s*CsInstruments', Name.Tag, ('orchestra', 'tag')),
436            (r'<\s*CsScore', Name.Tag, ('score', 'tag')),
437            (r'<\s*[Hh][Tt][Mm][Ll]', Name.Tag, ('HTML', 'tag')),
438
439            (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
440            (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag)
441        ],
442
443        'orchestra': [
444            (r'<\s*/\s*CsInstruments\s*>', Name.Tag, '#pop'),
445            (r'(.|\n)+?(?=<\s*/\s*CsInstruments\s*>)', using(CsoundOrchestraLexer))
446        ],
447        'score': [
448            (r'<\s*/\s*CsScore\s*>', Name.Tag, '#pop'),
449            (r'(.|\n)+?(?=<\s*/\s*CsScore\s*>)', using(CsoundScoreLexer))
450        ],
451        'HTML': [
452            (r'<\s*/\s*[Hh][Tt][Mm][Ll]\s*>', Name.Tag, '#pop'),
453            (r'(.|\n)+?(?=<\s*/\s*[Hh][Tt][Mm][Ll]\s*>)', using(HtmlLexer))
454        ],
455
456        'tag': [
457            (r'\s+', Text),
458            (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
459            (r'/?\s*>', Name.Tag, '#pop')
460        ],
461        'attr': [
462            (r'\s+', Text),
463            (r'".*?"', String, '#pop'),
464            (r"'.*?'", String, '#pop'),
465            (r'[^\s>]+', String, '#pop')
466        ]
467    }
468