1# This file is part of darktable, 2# copyright (c) 2013-2020 tobias ellinghaus. 3# 4# darktable is free software: you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation, either version 3 of the License, or 7# (at your option) any later version. 8# 9# darktable is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with darktable. If not, see <http://www.gnu.org/licenses/>. 16 17package scanner; 18 19use strict; 20use warnings; 21 22use Exporter; 23our @ISA = 'Exporter'; 24our @EXPORT = qw( @token %comments 25 $P_LINENO $P_FILENAME $P_TYPE $P_VALUE 26 $T_NONE $T_IDENT $T_KEYWORD $T_INTEGER_LITERAL $T_OPERATOR 27 $K_UNSIGNED $K_SIGNED $K_GBOOLEAN $K_CHAR $K_INT8 $K_UINT8 $K_SHORT $K_USHORT $K_INT $K_UINT $K_LONG $K_ULONG $K_FLOAT $K_DOUBLE $K_COMPLEX $K_TYPEDEF $K_STRUCT $K_UNION $K_CONST $K_VOLATILE $K_STATIC $K_ENUM $K_VOID $K_DT_MODULE_INTROSPECTION 28 $O_ASTERISK $O_AMPERSAND $O_SEMICOLON $O_COMMA $O_COLON $O_SLASH $O_LEFTROUND $O_RIGHTROUND $O_LEFTCURLY $O_RIGHTCURLY $O_LEFTSQUARE $O_RIGHTSQUARE $O_EQUAL 29 read_file get_token look_ahead token2string 30 isid isinteger issemicolon istypedef isstruct isunion isenum isleftcurly isrightcurly isleftround isrightround isleftsquare isrightsquare 31 iscomma isasterisk isequal isconst isvolatile isdtmoduleintrospection 32 ); 33 34 35################# the scanner ################# 36 37my %history; # we don't like cyclic includes 38 39my $lineno = 1; 40my $file; 41our $folder = ""; 42my @tokens; 43our @token; 44our %comments; 45 46my @code; 47 48# parser layout 49our $P_LINENO = 0; 50our $P_FILENAME = 1; 51our $P_TYPE = 2; 52our $P_VALUE = 3; 53 54my $i = 0; 55# token types 56our $T_NONE = $i++; 57our $T_IDENT = $i++; 58our $T_KEYWORD = $i++; 59our $T_INTEGER_LITERAL = $i++; 60our $T_OPERATOR = $i++; 61 62$i = 0; 63# keywords 64my @K_readable; 65our $K_UNSIGNED = $i++; push(@K_readable, 'unsigned'); 66our $K_SIGNED = $i++; push(@K_readable, 'signed'); 67our $K_GBOOLEAN = $i++; push(@K_readable, 'gboolean'); 68our $K_CHAR = $i++; push(@K_readable, 'char'); 69our $K_INT8 = $i++; push(@K_readable, 'int8_t'); 70our $K_UINT8 = $i++; push(@K_readable, 'uint8_t'); 71our $K_SHORT = $i++; push(@K_readable, 'short'); 72our $K_USHORT = $i++; push(@K_readable, 'ushort'); 73our $K_INT = $i++; push(@K_readable, 'int'); 74our $K_UINT = $i++; push(@K_readable, 'uint'); 75our $K_LONG = $i++; push(@K_readable, 'long'); 76our $K_ULONG = $i++; push(@K_readable, 'ulong'); 77our $K_FLOAT = $i++; push(@K_readable, 'float'); 78our $K_DOUBLE = $i++; push(@K_readable, 'double'); 79our $K_COMPLEX = $i++; push(@K_readable, 'complex'); 80our $K_TYPEDEF = $i++; push(@K_readable, 'typedef'); 81our $K_STRUCT = $i++; push(@K_readable, 'struct'); 82our $K_UNION = $i++; push(@K_readable, 'union'); 83our $K_CONST = $i++; push(@K_readable, 'const'); 84our $K_VOLATILE = $i++; push(@K_readable, 'volatile'); 85our $K_STATIC = $i++; push(@K_readable, 'static'); 86our $K_ENUM = $i++; push(@K_readable, 'enum'); 87our $K_VOID = $i++; push(@K_readable, 'void'); 88our $K_DT_MODULE_INTROSPECTION = $i++; push(@K_readable, 'DT_MODULE_INTROSPECTION'); 89my @keywords = ( 90 ['unsigned', $K_UNSIGNED], 91 ['signed', $K_SIGNED], 92 ['gboolean', $K_GBOOLEAN], 93 ['char', $K_CHAR], 94 ['gchar', $K_CHAR], 95 ['int8_t', $K_INT8], 96 ['uint8_t', $K_UINT8], 97 ['short', $K_SHORT], 98 ['int16_t', $K_SHORT], 99 ['uint16_t', $K_USHORT], 100 ['int', $K_INT], 101 ['gint', $K_INT], 102 ['uint', $K_UINT], 103 ['uint32_t', $K_UINT], 104 ['int32_t', $K_INT], 105 ['long', $K_LONG], 106 ['float', $K_FLOAT], 107 ['double', $K_DOUBLE], 108 ['complex', $K_COMPLEX], 109 ['typedef', $K_TYPEDEF], 110 ['struct', $K_STRUCT], 111 ['union', $K_UNION], 112 ['const', $K_CONST], 113 ['volatile', $K_VOLATILE], 114 ['static', $K_STATIC], 115 ['enum', $K_ENUM], 116 ['void', $K_VOID], 117 ['DT_MODULE_INTROSPECTION', $K_DT_MODULE_INTROSPECTION] 118); 119 120$i = 0; 121# operators 122my @O_readable; 123our $O_ASTERISK = $i++; push(@O_readable, '*'); 124our $O_AMPERSAND = $i++; push(@O_readable, '&'); 125our $O_SEMICOLON = $i++; push(@O_readable, ';'); 126our $O_COMMA = $i++; push(@O_readable, ','); 127our $O_COLON = $i++; push(@O_readable, ':'); 128our $O_SLASH = $i++; push(@O_readable, '/'); 129our $O_LEFTROUND = $i++; push(@O_readable, '('); 130our $O_RIGHTROUND = $i++; push(@O_readable, ')'); 131our $O_LEFTCURLY = $i++; push(@O_readable, '{'); 132our $O_RIGHTCURLY = $i++; push(@O_readable, '}'); 133our $O_LEFTSQUARE = $i++; push(@O_readable, '['); 134our $O_RIGHTSQUARE = $i++; push(@O_readable, ']'); 135our $O_EQUAL = $i++; push(@O_readable, '='); 136our $O_PLUS = $i++; push(@O_readable, '+'); 137our $O_MINUS = $i++; push(@O_readable, '-'); 138our $O_LESS = $i++; push(@O_readable, '<'); 139our $O_LESSLESS = $i++; push(@O_readable, '<<'); 140our $O_GREATER = $i++; push(@O_readable, '>'); 141our $O_GREATERGREATER = $i++; push(@O_readable, '>>'); 142our $O_PERCENT = $i++; push(@O_readable, '%'); 143our $O_CIRCUMFLEX = $i++; push(@O_readable, '^'); 144 145sub read_file 146{ 147 $file = shift; 148 149 return if(defined($history{$file})); 150 $history{$file} = 1; 151 152 open(IN, "<$file") or return; 153 $lineno = 1; 154 my @tmp = <IN>; 155 close(IN); 156 my $result = join('', @tmp); 157 unshift(@code, split(//, $result)); 158} 159 160# TODO: support something else than decimal numbers, i.e., octal and hex 161sub read_number 162{ 163 my $c = shift(@code); 164 my @buf; 165 while($c =~ /[0-9]/) 166 { 167 push(@buf, $c); 168 $c = shift(@code); 169 } 170 unshift(@code, $c); 171 return join('', @buf); 172} 173 174sub read_string 175{ 176 my $c = shift(@code); 177 my @buf; 178 while(defined($c) && $c =~ /[a-zA-Z_0-9]/) 179 { 180 push(@buf, $c); 181 $c = shift(@code); 182 } 183 unshift(@code, $c); 184 return join('', @buf); 185} 186 187sub handle_comment 188{ 189 my $_lineno = $lineno; 190 shift(@code); 191 my $c = $code[0]; 192 my @buf; 193 if($c eq '/') 194 { 195 # a comment of the form '//'. this goes till the end of the line 196 while(defined($c) && $c ne "\n") 197 { 198 push(@buf, $c); 199 $c = shift(@code); 200 } 201 unshift(@code, $c); 202 $lineno++; 203 } 204 elsif($c eq '*') 205 { 206 # a comment of the form '/*'. this goes till we find '*/' 207 while(defined($c) && ($c ne '*' || $code[0] ne '/')) 208 { 209 $lineno++ if($c eq "\n"); 210 push(@buf, $c); 211 $c = shift(@code); 212 } 213 push(@buf, $c); 214 } 215 else 216 { 217 # can't happen 218 print STDERR "comment error\n"; 219 } 220 my $comment = join('', @buf); 221 222 push(@{$comments{$file}[$_lineno]{raw}}, $comment); 223} 224 225sub handle_include 226{ 227 my $c = ' '; 228 $c = shift(@code) while($c eq ' '); 229 my $end; 230 if($c eq '"') { $end = '"'; } 231# elsif($c eq '<') { $end = '>'; } 232 else 233 { 234 unshift(@code, $c); 235 return; 236 } 237 $c = shift(@code); 238 my @buf; 239 while(defined($c) && $c ne $end) 240 { 241 if($c eq "\n") # no idea how to handle this, just ignore it 242 { 243 unshift(@code, $c); 244 ++$lineno; 245 return; 246 } 247 push(@buf, $c); 248 $c = shift(@code); 249 } 250 unshift(@code, $c); 251 return if(!defined($c)); 252 253 my $filename = join('', @buf); 254 255 if($filename =~ /^iop|^common/) 256 { 257 # add the current filename and lineno to the code stream so we 258 # can reset these when the included file is scanned 259 # note that all entries in @code coming from the files are single characters, 260 # so we can safely add longer strings 261 unshift(@code, 'undo_include', $file, $lineno); 262 read_file($folder.$filename); 263 } 264} 265 266sub handle_define 267{ 268 # just read until the end of the line 269 my $c = ' '; 270 $c = shift(@code) while(defined($code[0]) && $c ne "\n"); 271 unshift(@code, $c); 272} 273 274sub handle_preprocessor 275{ 276 my $string = read_string(); 277 if($string eq "include") { handle_include(); } 278 elsif($string eq "define") { handle_define(); } 279 unshift(@code, ' '); 280} 281 282sub read_token 283{ 284 for(; defined($code[0]); shift(@code)) 285 { 286 my $c = $code[0]; 287 if($c eq "\n") { ++$lineno;} 288 elsif($c eq " " || $c eq "\t") { next; } 289 elsif($c eq "#") { shift(@code); handle_preprocessor(); next; } 290 elsif($c eq "undo_include") { shift(@code); $file = shift(@code); $lineno = shift(@code); } 291 elsif($c eq "&") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_AMPERSAND); } 292 elsif($c eq "*") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_ASTERISK); } 293 elsif($c eq "/" && ($code[1] eq "/" || $code[1] eq "*" )) 294 { 295 handle_comment(); 296 next; 297 } 298 elsif($c eq ";") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_SEMICOLON); } 299 elsif($c eq ",") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_COMMA); } 300 elsif($c eq "(") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTROUND); } 301 elsif($c eq ")") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTROUND); } 302 elsif($c eq "{") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTCURLY); } 303 elsif($c eq "}") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTCURLY); } 304 elsif($c eq "[") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTSQUARE); } 305 elsif($c eq "]") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTSQUARE); } 306 elsif($c eq ":") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_COLON); } 307 elsif($c eq "=") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_EQUAL); } 308 elsif($c eq "+") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_PLUS); } 309 elsif($c eq "-") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_MINUS); } 310 elsif($c eq "<") 311 { 312 shift(@code); 313 if($code[0] eq "<") 314 { 315 shift(@code); 316 return ($lineno, $file, $T_OPERATOR, $O_LESSLESS); 317 } 318 else 319 { 320 return ($lineno, $file, $T_OPERATOR, $O_LESS); 321 } 322 } 323 elsif($c eq ">") 324 { 325 shift(@code); 326 if($code[0] eq ">") 327 { 328 shift(@code); 329 return ($lineno, $file, $T_OPERATOR, $O_GREATERGREATER); 330 } 331 else 332 { 333 return ($lineno, $file, $T_OPERATOR, $O_GREATER); 334 } 335 } 336 elsif($c eq "%") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_PERCENT); } 337 elsif($c eq "^") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_CIRCUMFLEX); } 338 elsif($c =~ /^[0-9]$/) 339 { 340 my $number = read_number(); 341 return ($lineno, $file, $T_INTEGER_LITERAL, $number); 342 } 343 elsif($c =~ /^[a-zA-Z_]$/) 344 { 345 my $string = read_string(); 346 foreach(@keywords) 347 { 348 my @entry = @{$_}; 349 if($string eq $entry[0]) 350 { 351 return ($lineno, $file, $T_KEYWORD, $entry[1]); 352 } 353 } 354 return ($lineno, $file, $T_IDENT, "$string"); 355 } 356 else { 357 # we don't care that we can't understand every input symbol, we just read over them until we reach something we know. 358 # everything we see from there on should be handled by the scanner/parser 359 # print STDERR "scanner error: ".$c."\n"; 360 } 361 } 362 return ($lineno, $file, $T_NONE, 0); 363} 364 365sub get_token 366{ 367 my $n_tokens = @tokens; 368 return read_token() if($n_tokens == 0); 369 return @{shift(@tokens)}; 370} 371 372sub look_ahead 373{ 374 my $steps = shift; 375 my $n_tokens = @tokens; 376 377 return $tokens[$steps-1] if($n_tokens >= $steps); 378 379 my @token; 380 for(my $i = $n_tokens; $i < $steps; ++$i ) 381 { 382 @token = read_token(); 383 return @token if($token[$P_TYPE] == $T_NONE); # Can't look ahead that far. 384 push(@tokens, [@token]); 385 } 386 return @token; 387} 388 389sub token2string 390{ 391 my $token = shift; 392 my $result; 393 394 if ($token[$P_TYPE] == $T_NONE) { $result = '<EMPTY TOKEN>'; } 395 elsif($token[$P_TYPE] == $T_IDENT) { $result = $token[$P_VALUE]; } 396 elsif($token[$P_TYPE] == $T_KEYWORD) { $result = $K_readable[$token[$P_VALUE]]; } 397 elsif($token[$P_TYPE] == $T_INTEGER_LITERAL) { $result = $token[$P_VALUE]; } 398 elsif($token[$P_TYPE] == $T_OPERATOR) { $result = $O_readable[$token[$P_VALUE]]; } 399 else { $result = '<UNKNOWN TOKEN TYPE>'; } 400 401 return $result; 402} 403 404sub issemicolon { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_SEMICOLON); } 405sub isleftcurly { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTCURLY); } 406sub isrightcurly { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTCURLY); } 407sub isleftround { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTROUND); } 408sub isrightround { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTROUND); } 409sub isleftsquare { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTSQUARE); } 410sub isrightsquare { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTSQUARE); } 411sub iscomma { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_COMMA); } 412sub isasterisk { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_ASTERISK); } 413sub isequal { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_EQUAL); } 414sub isid { my $token = shift; return ($token[$P_TYPE] == $T_IDENT); } 415sub isinteger { my $token = shift; return ($token[$P_TYPE] == $T_INTEGER_LITERAL); } 416sub istypedef { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_TYPEDEF); } 417sub isstruct { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_STRUCT); } 418sub isunion { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_UNION); } 419sub isenum { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_ENUM); } 420sub isconst { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_CONST); } 421sub isvolatile { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_VOLATILE); } 422sub isdtmoduleintrospection { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_DT_MODULE_INTROSPECTION); } 423 4241; 425 426# modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh 427# vim: shiftwidth=2 expandtab tabstop=2 cindent 428# kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-space on; 429