1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.matlab
4    ~~~~~~~~~~~~~~~~~~~~~~
5
6    Lexers for Matlab and related languages.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import Lexer, RegexLexer, bygroups, default, words, \
15    do_insertions
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17    Number, Punctuation, Generic, Whitespace
18
19from pygments.lexers import _scilab_builtins
20
21__all__ = ['MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer']
22
23
24class MatlabLexer(RegexLexer):
25    """
26    For Matlab source code.
27
28    .. versionadded:: 0.10
29    """
30    name = 'Matlab'
31    aliases = ['matlab']
32    filenames = ['*.m']
33    mimetypes = ['text/matlab']
34
35    #
36    # These lists are generated automatically.
37    # Run the following in bash shell:
38    #
39    # for f in elfun specfun elmat; do
40    #   echo -n "$f = "
41    #   matlab -nojvm -r "help $f;exit;" | perl -ne \
42    #   'push(@c,$1) if /^    (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}'
43    # done
44    #
45    # elfun: Elementary math functions
46    # specfun: Special Math functions
47    # elmat: Elementary matrices and matrix manipulation
48    #
49    # taken from Matlab version 9.4 (R2018a)
50    #
51    elfun = ("sin", "sind", "sinh", "asin", "asind", "asinh", "cos", "cosd", "cosh",
52             "acos", "acosd", "acosh", "tan", "tand", "tanh", "atan", "atand", "atan2",
53             "atan2d", "atanh", "sec", "secd", "sech", "asec", "asecd", "asech", "csc", "cscd",
54             "csch", "acsc", "acscd", "acsch", "cot", "cotd", "coth", "acot", "acotd",
55             "acoth", "hypot", "deg2rad", "rad2deg", "exp", "expm1", "log", "log1p", "log10", "log2", "pow2",
56             "realpow", "reallog", "realsqrt", "sqrt", "nthroot", "nextpow2", "abs",
57             "angle", "complex", "conj", "imag", "real", "unwrap", "isreal", "cplxpair",
58             "fix", "floor", "ceil", "round", "mod", "rem", "sign")
59    specfun = ("airy", "besselj", "bessely", "besselh", "besseli", "besselk", "beta",
60               "betainc", "betaincinv", "betaln", "ellipj", "ellipke", "erf", "erfc", "erfcx",
61               "erfinv", "erfcinv", "expint", "gamma", "gammainc", "gammaincinv", "gammaln", "psi", "legendre",
62               "cross", "dot", "factor", "isprime", "primes", "gcd", "lcm", "rat",
63               "rats", "perms", "nchoosek", "factorial", "cart2sph", "cart2pol",
64               "pol2cart", "sph2cart", "hsv2rgb", "rgb2hsv")
65    elmat = ("zeros", "ones", "eye", "repmat", "repelem", "linspace", "logspace",
66             "freqspace", "meshgrid", "accumarray", "size", "length", "ndims", "numel",
67             "disp", "isempty", "isequal", "isequaln", "cat", "reshape",
68             "diag", "blkdiag", "tril", "triu", "fliplr", "flipud", "flip", "rot90",
69             "find", "end", "sub2ind", "ind2sub", "bsxfun", "ndgrid", "permute",
70             "ipermute", "shiftdim", "circshift", "squeeze", "isscalar", "isvector",
71             "isrow", "iscolumn", "ismatrix", "eps", "realmax", "realmin", "intmax", "intmin", "flintmax", "pi", "i", "inf", "nan", "isnan",
72             "isinf", "isfinite", "j", "true", "false", "compan", "gallery", "hadamard", "hankel",
73             "hilb", "invhilb", "magic", "pascal", "rosser", "toeplitz", "vander",
74             "wilkinson")
75
76    _operators = r'-|==|~=|<=|>=|<|>|&&|&|~|\|\|?|\.\*|\*|\+|\.\^|\.\\|\./|/|\\'
77
78    tokens = {
79        'root': [
80            # line starting with '!' is sent as a system command.  not sure what
81            # label to use...
82            (r'^!.*', String.Other),
83            (r'%\{\s*\n', Comment.Multiline, 'blockcomment'),
84            (r'%.*$', Comment),
85            (r'^\s*function\b', Keyword, 'deffunc'),
86
87            # from 'iskeyword' on version 9.4 (R2018a):
88            # Check that there is no preceding dot, as keywords are valid field
89            # names.
90            (words(('break', 'case', 'catch', 'classdef', 'continue', 'else',
91                    'elseif', 'end', 'for', 'function',
92                    'global', 'if', 'otherwise', 'parfor',
93                    'persistent', 'return', 'spmd', 'switch',
94                    'try', 'while'),
95                   prefix=r'(?<!\.)', suffix=r'\b'),
96             Keyword),
97
98            ("(" + "|".join(elfun + specfun + elmat) + r')\b',  Name.Builtin),
99
100            # line continuation with following comment:
101            (r'(\.\.\.)(.*)$', bygroups(Keyword, Comment)),
102
103            # command form:
104            # "How MATLAB Recognizes Command Syntax" specifies that an operator
105            # is recognized if it is either surrounded by spaces or by no
106            # spaces on both sides; only the former case matters for us.  (This
107            # allows distinguishing `cd ./foo` from `cd ./ foo`.)
108            (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+)' % _operators,
109             bygroups(Text, Name, Text), 'commandargs'),
110
111            # operators:
112            (_operators, Operator),
113
114            # numbers (must come before punctuation to handle `.5`; cannot use
115            # `\b` due to e.g. `5. + .5`).
116            (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float),
117            (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float),
118            (r'\b\d+\b', Number.Integer),
119
120            # punctuation:
121            (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
122            (r'=|:|;', Punctuation),
123
124            # quote can be transpose, instead of string:
125            # (not great, but handles common cases...)
126            (r'(?<=[\w)\].])\'+', Operator),
127
128            (r'"(""|[^"])*"', String),
129
130            (r'(?<![\w)\].])\'', String, 'string'),
131            (r'[a-zA-Z_]\w*', Name),
132            (r'.', Text),
133        ],
134        'blockcomment': [
135            (r'^\s*%\}', Comment.Multiline, '#pop'),
136            (r'^.*\n', Comment.Multiline),
137            (r'.', Comment.Multiline),
138        ],
139        'deffunc': [
140            (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)',
141             bygroups(Whitespace, Text, Whitespace, Punctuation,
142                      Whitespace, Name.Function, Punctuation, Text,
143                      Punctuation, Whitespace), '#pop'),
144            # function with no args
145            (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
146        ],
147        'string': [
148            (r"[^']*'", String, '#pop'),
149        ],
150        'commandargs': [
151            # If an equal sign or other operator is encountered, this
152            # isn't a command. It might be a variable assignment or
153            # comparison operation with multiple spaces before the
154            # equal sign or operator
155            (r"=", Punctuation, '#pop'),
156            (_operators, Operator, '#pop'),
157            (r"[ \t]+", Text),
158            ("'[^']*'", String),
159            (r"[^';\s]+", String),
160            (";", Punctuation, '#pop'),
161            default('#pop'),
162        ]
163    }
164
165    def analyse_text(text):
166        # function declaration.
167        first_non_comment = next((line for line in text.splitlines()
168                                  if not re.match(r'^\s*%', text)), '').strip()
169        if (first_non_comment.startswith('function')
170                and '{' not in first_non_comment):
171            return 1.
172        # comment
173        elif re.search(r'^\s*%', text, re.M):
174            return 0.2
175        # system cmd
176        elif re.search(r'^!\w+', text, re.M):
177            return 0.2
178
179
180line_re  = re.compile('.*?\n')
181
182
183class MatlabSessionLexer(Lexer):
184    """
185    For Matlab sessions.  Modeled after PythonConsoleLexer.
186    Contributed by Ken Schutte <kschutte@csail.mit.edu>.
187
188    .. versionadded:: 0.10
189    """
190    name = 'Matlab session'
191    aliases = ['matlabsession']
192
193    def get_tokens_unprocessed(self, text):
194        mlexer = MatlabLexer(**self.options)
195
196        curcode = ''
197        insertions = []
198        continuation = False
199
200        for match in line_re.finditer(text):
201            line = match.group()
202
203            if line.startswith('>> '):
204                insertions.append((len(curcode),
205                                   [(0, Generic.Prompt, line[:3])]))
206                curcode += line[3:]
207
208            elif line.startswith('>>'):
209                insertions.append((len(curcode),
210                                   [(0, Generic.Prompt, line[:2])]))
211                curcode += line[2:]
212
213            elif line.startswith('???'):
214
215                idx = len(curcode)
216
217                # without is showing error on same line as before...?
218                # line = "\n" + line
219                token = (0, Generic.Traceback, line)
220                insertions.append((idx, [token]))
221            elif continuation:
222                # line_start is the length of the most recent prompt symbol
223                line_start = len(insertions[-1][-1][-1])
224                # Set leading spaces with the length of the prompt to be a generic prompt
225                # This keeps code aligned when prompts are removed, say with some Javascript
226                if line.startswith(' '*line_start):
227                    insertions.append((len(curcode),
228                                    [(0, Generic.Prompt, line[:line_start])]))
229                    curcode += line[line_start:]
230                else:
231                    curcode += line
232            else:
233                if curcode:
234                    yield from do_insertions(
235                        insertions, mlexer.get_tokens_unprocessed(curcode))
236                    curcode = ''
237                    insertions = []
238
239                yield match.start(), Generic.Output, line
240
241            # Does not allow continuation if a comment is included after the ellipses.
242            # Continues any line that ends with ..., even comments (lines that start with %)
243            if line.strip().endswith('...'):
244                continuation = True
245            else:
246                continuation = False
247
248        if curcode:  # or item:
249            yield from do_insertions(
250                insertions, mlexer.get_tokens_unprocessed(curcode))
251
252
253class OctaveLexer(RegexLexer):
254    """
255    For GNU Octave source code.
256
257    .. versionadded:: 1.5
258    """
259    name = 'Octave'
260    aliases = ['octave']
261    filenames = ['*.m']
262    mimetypes = ['text/octave']
263
264    # These lists are generated automatically.
265    # Run the following in bash shell:
266    #
267    # First dump all of the Octave manual into a plain text file:
268    #
269    #   $ info octave --subnodes -o octave-manual
270    #
271    # Now grep through it:
272
273    # for i in \
274    #     "Built-in Function" "Command" "Function File" \
275    #     "Loadable Function" "Mapping Function";
276    # do
277    #     perl -e '@name = qw('"$i"');
278    #              print lc($name[0]),"_kw = [\n"';
279    #
280    #     perl -n -e 'print "\"$1\",\n" if /-- '"$i"': .* (\w*) \(/;' \
281    #         octave-manual | sort | uniq ;
282    #     echo "]" ;
283    #     echo;
284    # done
285
286    # taken from Octave Mercurial changeset 8cc154f45e37 (30-jan-2011)
287
288    builtin_kw = (
289        "addlistener", "addpath", "addproperty", "all",
290        "and", "any", "argnames", "argv", "assignin",
291        "atexit", "autoload",
292        "available_graphics_toolkits", "beep_on_error",
293        "bitand", "bitmax", "bitor", "bitshift", "bitxor",
294        "cat", "cell", "cellstr", "char", "class", "clc",
295        "columns", "command_line_path",
296        "completion_append_char", "completion_matches",
297        "complex", "confirm_recursive_rmdir", "cputime",
298        "crash_dumps_octave_core", "ctranspose", "cumprod",
299        "cumsum", "debug_on_error", "debug_on_interrupt",
300        "debug_on_warning", "default_save_options",
301        "dellistener", "diag", "diff", "disp",
302        "doc_cache_file", "do_string_escapes", "double",
303        "drawnow", "e", "echo_executing_commands", "eps",
304        "eq", "errno", "errno_list", "error", "eval",
305        "evalin", "exec", "exist", "exit", "eye", "false",
306        "fclear", "fclose", "fcntl", "fdisp", "feof",
307        "ferror", "feval", "fflush", "fgetl", "fgets",
308        "fieldnames", "file_in_loadpath", "file_in_path",
309        "filemarker", "filesep", "find_dir_in_path",
310        "fixed_point_format", "fnmatch", "fopen", "fork",
311        "formula", "fprintf", "fputs", "fread", "freport",
312        "frewind", "fscanf", "fseek", "fskipl", "ftell",
313        "functions", "fwrite", "ge", "genpath", "get",
314        "getegid", "getenv", "geteuid", "getgid",
315        "getpgrp", "getpid", "getppid", "getuid", "glob",
316        "gt", "gui_mode", "history_control",
317        "history_file", "history_size",
318        "history_timestamp_format_string", "home",
319        "horzcat", "hypot", "ifelse",
320        "ignore_function_time_stamp", "inferiorto",
321        "info_file", "info_program", "inline", "input",
322        "intmax", "intmin", "ipermute",
323        "is_absolute_filename", "isargout", "isbool",
324        "iscell", "iscellstr", "ischar", "iscomplex",
325        "isempty", "isfield", "isfloat", "isglobal",
326        "ishandle", "isieee", "isindex", "isinteger",
327        "islogical", "ismatrix", "ismethod", "isnull",
328        "isnumeric", "isobject", "isreal",
329        "is_rooted_relative_filename", "issorted",
330        "isstruct", "isvarname", "kbhit", "keyboard",
331        "kill", "lasterr", "lasterror", "lastwarn",
332        "ldivide", "le", "length", "link", "linspace",
333        "logical", "lstat", "lt", "make_absolute_filename",
334        "makeinfo_program", "max_recursion_depth", "merge",
335        "methods", "mfilename", "minus", "mislocked",
336        "mkdir", "mkfifo", "mkstemp", "mldivide", "mlock",
337        "mouse_wheel_zoom", "mpower", "mrdivide", "mtimes",
338        "munlock", "nargin", "nargout",
339        "native_float_format", "ndims", "ne", "nfields",
340        "nnz", "norm", "not", "numel", "nzmax",
341        "octave_config_info", "octave_core_file_limit",
342        "octave_core_file_name",
343        "octave_core_file_options", "ones", "or",
344        "output_max_field_width", "output_precision",
345        "page_output_immediately", "page_screen_output",
346        "path", "pathsep", "pause", "pclose", "permute",
347        "pi", "pipe", "plus", "popen", "power",
348        "print_empty_dimensions", "printf",
349        "print_struct_array_contents", "prod",
350        "program_invocation_name", "program_name",
351        "putenv", "puts", "pwd", "quit", "rats", "rdivide",
352        "readdir", "readlink", "read_readline_init_file",
353        "realmax", "realmin", "rehash", "rename",
354        "repelems", "re_read_readline_init_file", "reset",
355        "reshape", "resize", "restoredefaultpath",
356        "rethrow", "rmdir", "rmfield", "rmpath", "rows",
357        "save_header_format_string", "save_precision",
358        "saving_history", "scanf", "set", "setenv",
359        "shell_cmd", "sighup_dumps_octave_core",
360        "sigterm_dumps_octave_core", "silent_functions",
361        "single", "size", "size_equal", "sizemax",
362        "sizeof", "sleep", "source", "sparse_auto_mutate",
363        "split_long_rows", "sprintf", "squeeze", "sscanf",
364        "stat", "stderr", "stdin", "stdout", "strcmp",
365        "strcmpi", "string_fill_char", "strncmp",
366        "strncmpi", "struct", "struct_levels_to_print",
367        "strvcat", "subsasgn", "subsref", "sum", "sumsq",
368        "superiorto", "suppress_verbose_help_message",
369        "symlink", "system", "tic", "tilde_expand",
370        "times", "tmpfile", "tmpnam", "toc", "toupper",
371        "transpose", "true", "typeinfo", "umask", "uminus",
372        "uname", "undo_string_escapes", "unlink", "uplus",
373        "upper", "usage", "usleep", "vec", "vectorize",
374        "vertcat", "waitpid", "warning", "warranty",
375        "whos_line_format", "yes_or_no", "zeros",
376        "inf", "Inf", "nan", "NaN")
377
378    command_kw = ("close", "load", "who", "whos")
379
380    function_kw = (
381        "accumarray", "accumdim", "acosd", "acotd",
382        "acscd", "addtodate", "allchild", "ancestor",
383        "anova", "arch_fit", "arch_rnd", "arch_test",
384        "area", "arma_rnd", "arrayfun", "ascii", "asctime",
385        "asecd", "asind", "assert", "atand",
386        "autoreg_matrix", "autumn", "axes", "axis", "bar",
387        "barh", "bartlett", "bartlett_test", "beep",
388        "betacdf", "betainv", "betapdf", "betarnd",
389        "bicgstab", "bicubic", "binary", "binocdf",
390        "binoinv", "binopdf", "binornd", "bitcmp",
391        "bitget", "bitset", "blackman", "blanks",
392        "blkdiag", "bone", "box", "brighten", "calendar",
393        "cast", "cauchy_cdf", "cauchy_inv", "cauchy_pdf",
394        "cauchy_rnd", "caxis", "celldisp", "center", "cgs",
395        "chisquare_test_homogeneity",
396        "chisquare_test_independence", "circshift", "cla",
397        "clabel", "clf", "clock", "cloglog", "closereq",
398        "colon", "colorbar", "colormap", "colperm",
399        "comet", "common_size", "commutation_matrix",
400        "compan", "compare_versions", "compass",
401        "computer", "cond", "condest", "contour",
402        "contourc", "contourf", "contrast", "conv",
403        "convhull", "cool", "copper", "copyfile", "cor",
404        "corrcoef", "cor_test", "cosd", "cotd", "cov",
405        "cplxpair", "cross", "cscd", "cstrcat", "csvread",
406        "csvwrite", "ctime", "cumtrapz", "curl", "cut",
407        "cylinder", "date", "datenum", "datestr",
408        "datetick", "datevec", "dblquad", "deal",
409        "deblank", "deconv", "delaunay", "delaunayn",
410        "delete", "demo", "detrend", "diffpara", "diffuse",
411        "dir", "discrete_cdf", "discrete_inv",
412        "discrete_pdf", "discrete_rnd", "display",
413        "divergence", "dlmwrite", "dos", "dsearch",
414        "dsearchn", "duplication_matrix", "durbinlevinson",
415        "ellipsoid", "empirical_cdf", "empirical_inv",
416        "empirical_pdf", "empirical_rnd", "eomday",
417        "errorbar", "etime", "etreeplot", "example",
418        "expcdf", "expinv", "expm", "exppdf", "exprnd",
419        "ezcontour", "ezcontourf", "ezmesh", "ezmeshc",
420        "ezplot", "ezpolar", "ezsurf", "ezsurfc", "factor",
421        "factorial", "fail", "fcdf", "feather", "fftconv",
422        "fftfilt", "fftshift", "figure", "fileattrib",
423        "fileparts", "fill", "findall", "findobj",
424        "findstr", "finv", "flag", "flipdim", "fliplr",
425        "flipud", "fpdf", "fplot", "fractdiff", "freqz",
426        "freqz_plot", "frnd", "fsolve",
427        "f_test_regression", "ftp", "fullfile", "fzero",
428        "gamcdf", "gaminv", "gampdf", "gamrnd", "gca",
429        "gcbf", "gcbo", "gcf", "genvarname", "geocdf",
430        "geoinv", "geopdf", "geornd", "getfield", "ginput",
431        "glpk", "gls", "gplot", "gradient",
432        "graphics_toolkit", "gray", "grid", "griddata",
433        "griddatan", "gtext", "gunzip", "gzip", "hadamard",
434        "hamming", "hankel", "hanning", "hggroup",
435        "hidden", "hilb", "hist", "histc", "hold", "hot",
436        "hotelling_test", "housh", "hsv", "hurst",
437        "hygecdf", "hygeinv", "hygepdf", "hygernd",
438        "idivide", "ifftshift", "image", "imagesc",
439        "imfinfo", "imread", "imshow", "imwrite", "index",
440        "info", "inpolygon", "inputname", "interpft",
441        "interpn", "intersect", "invhilb", "iqr", "isa",
442        "isdefinite", "isdir", "is_duplicate_entry",
443        "isequal", "isequalwithequalnans", "isfigure",
444        "ishermitian", "ishghandle", "is_leap_year",
445        "isletter", "ismac", "ismember", "ispc", "isprime",
446        "isprop", "isscalar", "issquare", "isstrprop",
447        "issymmetric", "isunix", "is_valid_file_id",
448        "isvector", "jet", "kendall",
449        "kolmogorov_smirnov_cdf",
450        "kolmogorov_smirnov_test", "kruskal_wallis_test",
451        "krylov", "kurtosis", "laplace_cdf", "laplace_inv",
452        "laplace_pdf", "laplace_rnd", "legend", "legendre",
453        "license", "line", "linkprop", "list_primes",
454        "loadaudio", "loadobj", "logistic_cdf",
455        "logistic_inv", "logistic_pdf", "logistic_rnd",
456        "logit", "loglog", "loglogerr", "logm", "logncdf",
457        "logninv", "lognpdf", "lognrnd", "logspace",
458        "lookfor", "ls_command", "lsqnonneg", "magic",
459        "mahalanobis", "manova", "matlabroot",
460        "mcnemar_test", "mean", "meansq", "median", "menu",
461        "mesh", "meshc", "meshgrid", "meshz", "mexext",
462        "mget", "mkpp", "mode", "moment", "movefile",
463        "mpoles", "mput", "namelengthmax", "nargchk",
464        "nargoutchk", "nbincdf", "nbininv", "nbinpdf",
465        "nbinrnd", "nchoosek", "ndgrid", "newplot", "news",
466        "nonzeros", "normcdf", "normest", "norminv",
467        "normpdf", "normrnd", "now", "nthroot", "null",
468        "ocean", "ols", "onenormest", "optimget",
469        "optimset", "orderfields", "orient", "orth",
470        "pack", "pareto", "parseparams", "pascal", "patch",
471        "pathdef", "pcg", "pchip", "pcolor", "pcr",
472        "peaks", "periodogram", "perl", "perms", "pie",
473        "pink", "planerot", "playaudio", "plot",
474        "plotmatrix", "plotyy", "poisscdf", "poissinv",
475        "poisspdf", "poissrnd", "polar", "poly",
476        "polyaffine", "polyarea", "polyderiv", "polyfit",
477        "polygcd", "polyint", "polyout", "polyreduce",
478        "polyval", "polyvalm", "postpad", "powerset",
479        "ppder", "ppint", "ppjumps", "ppplot", "ppval",
480        "pqpnonneg", "prepad", "primes", "print",
481        "print_usage", "prism", "probit", "qp", "qqplot",
482        "quadcc", "quadgk", "quadl", "quadv", "quiver",
483        "qzhess", "rainbow", "randi", "range", "rank",
484        "ranks", "rat", "reallog", "realpow", "realsqrt",
485        "record", "rectangle_lw", "rectangle_sw",
486        "rectint", "refresh", "refreshdata",
487        "regexptranslate", "repmat", "residue", "ribbon",
488        "rindex", "roots", "rose", "rosser", "rotdim",
489        "rref", "run", "run_count", "rundemos", "run_test",
490        "runtests", "saveas", "saveaudio", "saveobj",
491        "savepath", "scatter", "secd", "semilogx",
492        "semilogxerr", "semilogy", "semilogyerr",
493        "setaudio", "setdiff", "setfield", "setxor",
494        "shading", "shift", "shiftdim", "sign_test",
495        "sinc", "sind", "sinetone", "sinewave", "skewness",
496        "slice", "sombrero", "sortrows", "spaugment",
497        "spconvert", "spdiags", "spearman", "spectral_adf",
498        "spectral_xdf", "specular", "speed", "spencer",
499        "speye", "spfun", "sphere", "spinmap", "spline",
500        "spones", "sprand", "sprandn", "sprandsym",
501        "spring", "spstats", "spy", "sqp", "stairs",
502        "statistics", "std", "stdnormal_cdf",
503        "stdnormal_inv", "stdnormal_pdf", "stdnormal_rnd",
504        "stem", "stft", "strcat", "strchr", "strjust",
505        "strmatch", "strread", "strsplit", "strtok",
506        "strtrim", "strtrunc", "structfun", "studentize",
507        "subplot", "subsindex", "subspace", "substr",
508        "substruct", "summer", "surf", "surface", "surfc",
509        "surfl", "surfnorm", "svds", "swapbytes",
510        "sylvester_matrix", "symvar", "synthesis", "table",
511        "tand", "tar", "tcdf", "tempdir", "tempname",
512        "test", "text", "textread", "textscan", "tinv",
513        "title", "toeplitz", "tpdf", "trace", "trapz",
514        "treelayout", "treeplot", "triangle_lw",
515        "triangle_sw", "tril", "trimesh", "triplequad",
516        "triplot", "trisurf", "triu", "trnd", "tsearchn",
517        "t_test", "t_test_regression", "type", "unidcdf",
518        "unidinv", "unidpdf", "unidrnd", "unifcdf",
519        "unifinv", "unifpdf", "unifrnd", "union", "unique",
520        "unix", "unmkpp", "unpack", "untabify", "untar",
521        "unwrap", "unzip", "u_test", "validatestring",
522        "vander", "var", "var_test", "vech", "ver",
523        "version", "view", "voronoi", "voronoin",
524        "waitforbuttonpress", "wavread", "wavwrite",
525        "wblcdf", "wblinv", "wblpdf", "wblrnd", "weekday",
526        "welch_test", "what", "white", "whitebg",
527        "wienrnd", "wilcoxon_test", "wilkinson", "winter",
528        "xlabel", "xlim", "ylabel", "yulewalker", "zip",
529        "zlabel", "z_test")
530
531    loadable_kw = (
532        "airy", "amd", "balance", "besselh", "besseli",
533        "besselj", "besselk", "bessely", "bitpack",
534        "bsxfun", "builtin", "ccolamd", "cellfun",
535        "cellslices", "chol", "choldelete", "cholinsert",
536        "cholinv", "cholshift", "cholupdate", "colamd",
537        "colloc", "convhulln", "convn", "csymamd",
538        "cummax", "cummin", "daspk", "daspk_options",
539        "dasrt", "dasrt_options", "dassl", "dassl_options",
540        "dbclear", "dbdown", "dbstack", "dbstatus",
541        "dbstop", "dbtype", "dbup", "dbwhere", "det",
542        "dlmread", "dmperm", "dot", "eig", "eigs",
543        "endgrent", "endpwent", "etree", "fft", "fftn",
544        "fftw", "filter", "find", "full", "gcd",
545        "getgrent", "getgrgid", "getgrnam", "getpwent",
546        "getpwnam", "getpwuid", "getrusage", "givens",
547        "gmtime", "gnuplot_binary", "hess", "ifft",
548        "ifftn", "inv", "isdebugmode", "issparse", "kron",
549        "localtime", "lookup", "lsode", "lsode_options",
550        "lu", "luinc", "luupdate", "matrix_type", "max",
551        "min", "mktime", "pinv", "qr", "qrdelete",
552        "qrinsert", "qrshift", "qrupdate", "quad",
553        "quad_options", "qz", "rand", "rande", "randg",
554        "randn", "randp", "randperm", "rcond", "regexp",
555        "regexpi", "regexprep", "schur", "setgrent",
556        "setpwent", "sort", "spalloc", "sparse", "spparms",
557        "sprank", "sqrtm", "strfind", "strftime",
558        "strptime", "strrep", "svd", "svd_driver", "syl",
559        "symamd", "symbfact", "symrcm", "time", "tsearch",
560        "typecast", "urlread", "urlwrite")
561
562    mapping_kw = (
563        "abs", "acos", "acosh", "acot", "acoth", "acsc",
564        "acsch", "angle", "arg", "asec", "asech", "asin",
565        "asinh", "atan", "atanh", "beta", "betainc",
566        "betaln", "bincoeff", "cbrt", "ceil", "conj", "cos",
567        "cosh", "cot", "coth", "csc", "csch", "erf", "erfc",
568        "erfcx", "erfinv", "exp", "finite", "fix", "floor",
569        "fmod", "gamma", "gammainc", "gammaln", "imag",
570        "isalnum", "isalpha", "isascii", "iscntrl",
571        "isdigit", "isfinite", "isgraph", "isinf",
572        "islower", "isna", "isnan", "isprint", "ispunct",
573        "isspace", "isupper", "isxdigit", "lcm", "lgamma",
574        "log", "lower", "mod", "real", "rem", "round",
575        "roundb", "sec", "sech", "sign", "sin", "sinh",
576        "sqrt", "tan", "tanh", "toascii", "tolower", "xor")
577
578    builtin_consts = (
579        "EDITOR", "EXEC_PATH", "I", "IMAGE_PATH", "NA",
580        "OCTAVE_HOME", "OCTAVE_VERSION", "PAGER",
581        "PAGER_FLAGS", "SEEK_CUR", "SEEK_END", "SEEK_SET",
582        "SIG", "S_ISBLK", "S_ISCHR", "S_ISDIR", "S_ISFIFO",
583        "S_ISLNK", "S_ISREG", "S_ISSOCK", "WCONTINUE",
584        "WCOREDUMP", "WEXITSTATUS", "WIFCONTINUED",
585        "WIFEXITED", "WIFSIGNALED", "WIFSTOPPED", "WNOHANG",
586        "WSTOPSIG", "WTERMSIG", "WUNTRACED")
587
588    tokens = {
589        'root': [
590            # We should look into multiline comments
591            (r'[%#].*$', Comment),
592            (r'^\s*function\b', Keyword, 'deffunc'),
593
594            # from 'iskeyword' on hg changeset 8cc154f45e37
595            (words((
596                '__FILE__', '__LINE__', 'break', 'case', 'catch', 'classdef', 'continue', 'do', 'else',
597                'elseif', 'end', 'end_try_catch', 'end_unwind_protect', 'endclassdef',
598                'endevents', 'endfor', 'endfunction', 'endif', 'endmethods', 'endproperties',
599                'endswitch', 'endwhile', 'events', 'for', 'function', 'get', 'global', 'if', 'methods',
600                'otherwise', 'persistent', 'properties', 'return', 'set', 'static', 'switch', 'try',
601                'until', 'unwind_protect', 'unwind_protect_cleanup', 'while'), suffix=r'\b'),
602             Keyword),
603
604            (words(builtin_kw + command_kw + function_kw + loadable_kw + mapping_kw,
605                   suffix=r'\b'),  Name.Builtin),
606
607            (words(builtin_consts, suffix=r'\b'), Name.Constant),
608
609            # operators in Octave but not Matlab:
610            (r'-=|!=|!|/=|--', Operator),
611            # operators:
612            (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator),
613            # operators in Octave but not Matlab requiring escape for re:
614            (r'\*=|\+=|\^=|\/=|\\=|\*\*|\+\+|\.\*\*', Operator),
615            # operators requiring escape for re:
616            (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator),
617
618
619            # punctuation:
620            (r'[\[\](){}:@.,]', Punctuation),
621            (r'=|:|;', Punctuation),
622
623            (r'"[^"]*"', String),
624
625            (r'(\d+\.\d*|\d*\.\d+)([eEf][+-]?[0-9]+)?', Number.Float),
626            (r'\d+[eEf][+-]?[0-9]+', Number.Float),
627            (r'\d+', Number.Integer),
628
629            # quote can be transpose, instead of string:
630            # (not great, but handles common cases...)
631            (r'(?<=[\w)\].])\'+', Operator),
632            (r'(?<![\w)\].])\'', String, 'string'),
633
634            (r'[a-zA-Z_]\w*', Name),
635            (r'.', Text),
636        ],
637        'string': [
638            (r"[^']*'", String, '#pop'),
639        ],
640        'deffunc': [
641            (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)',
642             bygroups(Whitespace, Text, Whitespace, Punctuation,
643                      Whitespace, Name.Function, Punctuation, Text,
644                      Punctuation, Whitespace), '#pop'),
645            # function with no args
646            (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
647        ],
648    }
649
650    def analyse_text(text):
651        """Octave is quite hard to spot, and it looks like Matlab as well."""
652        return 0
653
654
655class ScilabLexer(RegexLexer):
656    """
657    For Scilab source code.
658
659    .. versionadded:: 1.5
660    """
661    name = 'Scilab'
662    aliases = ['scilab']
663    filenames = ['*.sci', '*.sce', '*.tst']
664    mimetypes = ['text/scilab']
665
666    tokens = {
667        'root': [
668            (r'//.*?$', Comment.Single),
669            (r'^\s*function\b', Keyword, 'deffunc'),
670
671            (words((
672                '__FILE__', '__LINE__', 'break', 'case', 'catch', 'classdef', 'continue', 'do', 'else',
673                'elseif', 'end', 'end_try_catch', 'end_unwind_protect', 'endclassdef',
674                'endevents', 'endfor', 'endfunction', 'endif', 'endmethods', 'endproperties',
675                'endswitch', 'endwhile', 'events', 'for', 'function', 'get', 'global', 'if', 'methods',
676                'otherwise', 'persistent', 'properties', 'return', 'set', 'static', 'switch', 'try',
677                'until', 'unwind_protect', 'unwind_protect_cleanup', 'while'), suffix=r'\b'),
678             Keyword),
679
680            (words(_scilab_builtins.functions_kw +
681                   _scilab_builtins.commands_kw +
682                   _scilab_builtins.macros_kw, suffix=r'\b'), Name.Builtin),
683
684            (words(_scilab_builtins.variables_kw, suffix=r'\b'), Name.Constant),
685
686            # operators:
687            (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator),
688            # operators requiring escape for re:
689            (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator),
690
691            # punctuation:
692            (r'[\[\](){}@.,=:;]', Punctuation),
693
694            (r'"[^"]*"', String),
695
696            # quote can be transpose, instead of string:
697            # (not great, but handles common cases...)
698            (r'(?<=[\w)\].])\'+', Operator),
699            (r'(?<![\w)\].])\'', String, 'string'),
700
701            (r'(\d+\.\d*|\d*\.\d+)([eEf][+-]?[0-9]+)?', Number.Float),
702            (r'\d+[eEf][+-]?[0-9]+', Number.Float),
703            (r'\d+', Number.Integer),
704
705            (r'[a-zA-Z_]\w*', Name),
706            (r'.', Text),
707        ],
708        'string': [
709            (r"[^']*'", String, '#pop'),
710            (r'.', String, '#pop'),
711        ],
712        'deffunc': [
713            (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)',
714             bygroups(Whitespace, Text, Whitespace, Punctuation,
715                      Whitespace, Name.Function, Punctuation, Text,
716                      Punctuation, Whitespace), '#pop'),
717            # function with no args
718            (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
719        ],
720    }
721