# This file is part of darktable, # copyright (c) 2013-2020 tobias ellinghaus. # # darktable is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # darktable is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with darktable. If not, see . package scanner; use strict; use warnings; use Exporter; our @ISA = 'Exporter'; our @EXPORT = qw( @token %comments $P_LINENO $P_FILENAME $P_TYPE $P_VALUE $T_NONE $T_IDENT $T_KEYWORD $T_INTEGER_LITERAL $T_OPERATOR $K_UNSIGNED $K_SIGNED $K_GBOOLEAN $K_CHAR $K_INT8 $K_UINT8 $K_SHORT $K_USHORT $K_INT $K_UINT $K_LONG $K_ULONG $K_FLOAT $K_DOUBLE $K_COMPLEX $K_TYPEDEF $K_STRUCT $K_UNION $K_CONST $K_VOLATILE $K_STATIC $K_ENUM $K_VOID $K_DT_MODULE_INTROSPECTION $O_ASTERISK $O_AMPERSAND $O_SEMICOLON $O_COMMA $O_COLON $O_SLASH $O_LEFTROUND $O_RIGHTROUND $O_LEFTCURLY $O_RIGHTCURLY $O_LEFTSQUARE $O_RIGHTSQUARE $O_EQUAL read_file get_token look_ahead token2string isid isinteger issemicolon istypedef isstruct isunion isenum isleftcurly isrightcurly isleftround isrightround isleftsquare isrightsquare iscomma isasterisk isequal isconst isvolatile isdtmoduleintrospection ); ################# the scanner ################# my %history; # we don't like cyclic includes my $lineno = 1; my $file; our $folder = ""; my @tokens; our @token; our %comments; my @code; # parser layout our $P_LINENO = 0; our $P_FILENAME = 1; our $P_TYPE = 2; our $P_VALUE = 3; my $i = 0; # token types our $T_NONE = $i++; our $T_IDENT = $i++; our $T_KEYWORD = $i++; our $T_INTEGER_LITERAL = $i++; our $T_OPERATOR = $i++; $i = 0; # keywords my @K_readable; our $K_UNSIGNED = $i++; push(@K_readable, 'unsigned'); our $K_SIGNED = $i++; push(@K_readable, 'signed'); our $K_GBOOLEAN = $i++; push(@K_readable, 'gboolean'); our $K_CHAR = $i++; push(@K_readable, 'char'); our $K_INT8 = $i++; push(@K_readable, 'int8_t'); our $K_UINT8 = $i++; push(@K_readable, 'uint8_t'); our $K_SHORT = $i++; push(@K_readable, 'short'); our $K_USHORT = $i++; push(@K_readable, 'ushort'); our $K_INT = $i++; push(@K_readable, 'int'); our $K_UINT = $i++; push(@K_readable, 'uint'); our $K_LONG = $i++; push(@K_readable, 'long'); our $K_ULONG = $i++; push(@K_readable, 'ulong'); our $K_FLOAT = $i++; push(@K_readable, 'float'); our $K_DOUBLE = $i++; push(@K_readable, 'double'); our $K_COMPLEX = $i++; push(@K_readable, 'complex'); our $K_TYPEDEF = $i++; push(@K_readable, 'typedef'); our $K_STRUCT = $i++; push(@K_readable, 'struct'); our $K_UNION = $i++; push(@K_readable, 'union'); our $K_CONST = $i++; push(@K_readable, 'const'); our $K_VOLATILE = $i++; push(@K_readable, 'volatile'); our $K_STATIC = $i++; push(@K_readable, 'static'); our $K_ENUM = $i++; push(@K_readable, 'enum'); our $K_VOID = $i++; push(@K_readable, 'void'); our $K_DT_MODULE_INTROSPECTION = $i++; push(@K_readable, 'DT_MODULE_INTROSPECTION'); my @keywords = ( ['unsigned', $K_UNSIGNED], ['signed', $K_SIGNED], ['gboolean', $K_GBOOLEAN], ['char', $K_CHAR], ['gchar', $K_CHAR], ['int8_t', $K_INT8], ['uint8_t', $K_UINT8], ['short', $K_SHORT], ['int16_t', $K_SHORT], ['uint16_t', $K_USHORT], ['int', $K_INT], ['gint', $K_INT], ['uint', $K_UINT], ['uint32_t', $K_UINT], ['int32_t', $K_INT], ['long', $K_LONG], ['float', $K_FLOAT], ['double', $K_DOUBLE], ['complex', $K_COMPLEX], ['typedef', $K_TYPEDEF], ['struct', $K_STRUCT], ['union', $K_UNION], ['const', $K_CONST], ['volatile', $K_VOLATILE], ['static', $K_STATIC], ['enum', $K_ENUM], ['void', $K_VOID], ['DT_MODULE_INTROSPECTION', $K_DT_MODULE_INTROSPECTION] ); $i = 0; # operators my @O_readable; our $O_ASTERISK = $i++; push(@O_readable, '*'); our $O_AMPERSAND = $i++; push(@O_readable, '&'); our $O_SEMICOLON = $i++; push(@O_readable, ';'); our $O_COMMA = $i++; push(@O_readable, ','); our $O_COLON = $i++; push(@O_readable, ':'); our $O_SLASH = $i++; push(@O_readable, '/'); our $O_LEFTROUND = $i++; push(@O_readable, '('); our $O_RIGHTROUND = $i++; push(@O_readable, ')'); our $O_LEFTCURLY = $i++; push(@O_readable, '{'); our $O_RIGHTCURLY = $i++; push(@O_readable, '}'); our $O_LEFTSQUARE = $i++; push(@O_readable, '['); our $O_RIGHTSQUARE = $i++; push(@O_readable, ']'); our $O_EQUAL = $i++; push(@O_readable, '='); our $O_PLUS = $i++; push(@O_readable, '+'); our $O_MINUS = $i++; push(@O_readable, '-'); our $O_LESS = $i++; push(@O_readable, '<'); our $O_LESSLESS = $i++; push(@O_readable, '<<'); our $O_GREATER = $i++; push(@O_readable, '>'); our $O_GREATERGREATER = $i++; push(@O_readable, '>>'); our $O_PERCENT = $i++; push(@O_readable, '%'); our $O_CIRCUMFLEX = $i++; push(@O_readable, '^'); sub read_file { $file = shift; return if(defined($history{$file})); $history{$file} = 1; open(IN, "<$file") or return; $lineno = 1; my @tmp = ; close(IN); my $result = join('', @tmp); unshift(@code, split(//, $result)); } # TODO: support something else than decimal numbers, i.e., octal and hex sub read_number { my $c = shift(@code); my @buf; while($c =~ /[0-9]/) { push(@buf, $c); $c = shift(@code); } unshift(@code, $c); return join('', @buf); } sub read_string { my $c = shift(@code); my @buf; while(defined($c) && $c =~ /[a-zA-Z_0-9]/) { push(@buf, $c); $c = shift(@code); } unshift(@code, $c); return join('', @buf); } sub handle_comment { my $_lineno = $lineno; shift(@code); my $c = $code[0]; my @buf; if($c eq '/') { # a comment of the form '//'. this goes till the end of the line while(defined($c) && $c ne "\n") { push(@buf, $c); $c = shift(@code); } unshift(@code, $c); $lineno++; } elsif($c eq '*') { # a comment of the form '/*'. this goes till we find '*/' while(defined($c) && ($c ne '*' || $code[0] ne '/')) { $lineno++ if($c eq "\n"); push(@buf, $c); $c = shift(@code); } push(@buf, $c); } else { # can't happen print STDERR "comment error\n"; } my $comment = join('', @buf); push(@{$comments{$file}[$_lineno]{raw}}, $comment); } sub handle_include { my $c = ' '; $c = shift(@code) while($c eq ' '); my $end; if($c eq '"') { $end = '"'; } # elsif($c eq '<') { $end = '>'; } else { unshift(@code, $c); return; } $c = shift(@code); my @buf; while(defined($c) && $c ne $end) { if($c eq "\n") # no idea how to handle this, just ignore it { unshift(@code, $c); ++$lineno; return; } push(@buf, $c); $c = shift(@code); } unshift(@code, $c); return if(!defined($c)); my $filename = join('', @buf); if($filename =~ /^iop|^common/) { # add the current filename and lineno to the code stream so we # can reset these when the included file is scanned # note that all entries in @code coming from the files are single characters, # so we can safely add longer strings unshift(@code, 'undo_include', $file, $lineno); read_file($folder.$filename); } } sub handle_define { # just read until the end of the line my $c = ' '; $c = shift(@code) while(defined($code[0]) && $c ne "\n"); unshift(@code, $c); } sub handle_preprocessor { my $string = read_string(); if($string eq "include") { handle_include(); } elsif($string eq "define") { handle_define(); } unshift(@code, ' '); } sub read_token { for(; defined($code[0]); shift(@code)) { my $c = $code[0]; if($c eq "\n") { ++$lineno;} elsif($c eq " " || $c eq "\t") { next; } elsif($c eq "#") { shift(@code); handle_preprocessor(); next; } elsif($c eq "undo_include") { shift(@code); $file = shift(@code); $lineno = shift(@code); } elsif($c eq "&") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_AMPERSAND); } elsif($c eq "*") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_ASTERISK); } elsif($c eq "/" && ($code[1] eq "/" || $code[1] eq "*" )) { handle_comment(); next; } elsif($c eq ";") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_SEMICOLON); } elsif($c eq ",") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_COMMA); } elsif($c eq "(") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTROUND); } elsif($c eq ")") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTROUND); } elsif($c eq "{") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTCURLY); } elsif($c eq "}") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTCURLY); } elsif($c eq "[") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTSQUARE); } elsif($c eq "]") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTSQUARE); } elsif($c eq ":") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_COLON); } elsif($c eq "=") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_EQUAL); } elsif($c eq "+") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_PLUS); } elsif($c eq "-") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_MINUS); } elsif($c eq "<") { shift(@code); if($code[0] eq "<") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LESSLESS); } else { return ($lineno, $file, $T_OPERATOR, $O_LESS); } } elsif($c eq ">") { shift(@code); if($code[0] eq ">") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_GREATERGREATER); } else { return ($lineno, $file, $T_OPERATOR, $O_GREATER); } } elsif($c eq "%") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_PERCENT); } elsif($c eq "^") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_CIRCUMFLEX); } elsif($c =~ /^[0-9]$/) { my $number = read_number(); return ($lineno, $file, $T_INTEGER_LITERAL, $number); } elsif($c =~ /^[a-zA-Z_]$/) { my $string = read_string(); foreach(@keywords) { my @entry = @{$_}; if($string eq $entry[0]) { return ($lineno, $file, $T_KEYWORD, $entry[1]); } } return ($lineno, $file, $T_IDENT, "$string"); } else { # we don't care that we can't understand every input symbol, we just read over them until we reach something we know. # everything we see from there on should be handled by the scanner/parser # print STDERR "scanner error: ".$c."\n"; } } return ($lineno, $file, $T_NONE, 0); } sub get_token { my $n_tokens = @tokens; return read_token() if($n_tokens == 0); return @{shift(@tokens)}; } sub look_ahead { my $steps = shift; my $n_tokens = @tokens; return $tokens[$steps-1] if($n_tokens >= $steps); my @token; for(my $i = $n_tokens; $i < $steps; ++$i ) { @token = read_token(); return @token if($token[$P_TYPE] == $T_NONE); # Can't look ahead that far. push(@tokens, [@token]); } return @token; } sub token2string { my $token = shift; my $result; if ($token[$P_TYPE] == $T_NONE) { $result = ''; } elsif($token[$P_TYPE] == $T_IDENT) { $result = $token[$P_VALUE]; } elsif($token[$P_TYPE] == $T_KEYWORD) { $result = $K_readable[$token[$P_VALUE]]; } elsif($token[$P_TYPE] == $T_INTEGER_LITERAL) { $result = $token[$P_VALUE]; } elsif($token[$P_TYPE] == $T_OPERATOR) { $result = $O_readable[$token[$P_VALUE]]; } else { $result = ''; } return $result; } sub issemicolon { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_SEMICOLON); } sub isleftcurly { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTCURLY); } sub isrightcurly { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTCURLY); } sub isleftround { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTROUND); } sub isrightround { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTROUND); } sub isleftsquare { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTSQUARE); } sub isrightsquare { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTSQUARE); } sub iscomma { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_COMMA); } sub isasterisk { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_ASTERISK); } sub isequal { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_EQUAL); } sub isid { my $token = shift; return ($token[$P_TYPE] == $T_IDENT); } sub isinteger { my $token = shift; return ($token[$P_TYPE] == $T_INTEGER_LITERAL); } sub istypedef { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_TYPEDEF); } sub isstruct { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_STRUCT); } sub isunion { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_UNION); } sub isenum { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_ENUM); } sub isconst { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_CONST); } sub isvolatile { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_VOLATILE); } sub isdtmoduleintrospection { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_DT_MODULE_INTROSPECTION); } 1; # modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh # vim: shiftwidth=2 expandtab tabstop=2 cindent # kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-space on;