1#  This file is part of darktable,
2#  copyright (c) 2013-2020 tobias ellinghaus.
3#
4#  darktable is free software: you can redistribute it and/or modify
5#  it under the terms of the GNU General Public License as published by
6#  the Free Software Foundation, either version 3 of the License, or
7#  (at your option) any later version.
8#
9#  darktable is distributed in the hope that it will be useful,
10#  but WITHOUT ANY WARRANTY; without even the implied warranty of
11#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12#  GNU General Public License for more details.
13#
14#  You should have received a copy of the GNU General Public License
15#  along with darktable.  If not, see <http://www.gnu.org/licenses/>.
16
17package scanner;
18
19use strict;
20use warnings;
21
22use Exporter;
23our @ISA = 'Exporter';
24our @EXPORT = qw( @token %comments
25                  $P_LINENO $P_FILENAME $P_TYPE $P_VALUE
26                  $T_NONE $T_IDENT $T_KEYWORD $T_INTEGER_LITERAL $T_OPERATOR
27                  $K_UNSIGNED $K_SIGNED $K_GBOOLEAN $K_CHAR $K_INT8 $K_UINT8 $K_SHORT $K_USHORT $K_INT $K_UINT $K_LONG $K_ULONG $K_FLOAT $K_DOUBLE $K_COMPLEX $K_TYPEDEF $K_STRUCT $K_UNION $K_CONST $K_VOLATILE $K_STATIC $K_ENUM $K_VOID $K_DT_MODULE_INTROSPECTION
28                  $O_ASTERISK $O_AMPERSAND $O_SEMICOLON $O_COMMA $O_COLON $O_SLASH $O_LEFTROUND $O_RIGHTROUND $O_LEFTCURLY $O_RIGHTCURLY $O_LEFTSQUARE $O_RIGHTSQUARE $O_EQUAL
29                  read_file get_token look_ahead token2string
30                  isid isinteger issemicolon istypedef isstruct isunion isenum isleftcurly isrightcurly isleftround isrightround isleftsquare isrightsquare
31                  iscomma isasterisk isequal isconst isvolatile isdtmoduleintrospection
32                );
33
34
35################# the scanner #################
36
37my %history; # we don't like cyclic includes
38
39my $lineno = 1;
40my $file;
41our $folder = "";
42my @tokens;
43our @token;
44our %comments;
45
46my @code;
47
48# parser layout
49our $P_LINENO = 0;
50our $P_FILENAME = 1;
51our $P_TYPE = 2;
52our $P_VALUE = 3;
53
54my $i = 0;
55# token types
56our $T_NONE = $i++;
57our $T_IDENT = $i++;
58our $T_KEYWORD = $i++;
59our $T_INTEGER_LITERAL = $i++;
60our $T_OPERATOR = $i++;
61
62$i = 0;
63# keywords
64my  @K_readable;
65our $K_UNSIGNED = $i++; push(@K_readable, 'unsigned');
66our $K_SIGNED = $i++; push(@K_readable, 'signed');
67our $K_GBOOLEAN = $i++; push(@K_readable, 'gboolean');
68our $K_CHAR = $i++; push(@K_readable, 'char');
69our $K_INT8 = $i++; push(@K_readable, 'int8_t');
70our $K_UINT8 = $i++; push(@K_readable, 'uint8_t');
71our $K_SHORT = $i++; push(@K_readable, 'short');
72our $K_USHORT = $i++; push(@K_readable, 'ushort');
73our $K_INT = $i++; push(@K_readable, 'int');
74our $K_UINT = $i++; push(@K_readable, 'uint');
75our $K_LONG = $i++; push(@K_readable, 'long');
76our $K_ULONG = $i++; push(@K_readable, 'ulong');
77our $K_FLOAT = $i++; push(@K_readable, 'float');
78our $K_DOUBLE = $i++; push(@K_readable, 'double');
79our $K_COMPLEX = $i++; push(@K_readable, 'complex');
80our $K_TYPEDEF = $i++; push(@K_readable, 'typedef');
81our $K_STRUCT = $i++; push(@K_readable, 'struct');
82our $K_UNION = $i++; push(@K_readable, 'union');
83our $K_CONST = $i++; push(@K_readable, 'const');
84our $K_VOLATILE = $i++; push(@K_readable, 'volatile');
85our $K_STATIC = $i++; push(@K_readable, 'static');
86our $K_ENUM = $i++; push(@K_readable, 'enum');
87our $K_VOID = $i++; push(@K_readable, 'void');
88our $K_DT_MODULE_INTROSPECTION = $i++; push(@K_readable, 'DT_MODULE_INTROSPECTION');
89my  @keywords = (
90      ['unsigned', $K_UNSIGNED],
91      ['signed', $K_SIGNED],
92      ['gboolean', $K_GBOOLEAN],
93      ['char', $K_CHAR],
94      ['gchar', $K_CHAR],
95      ['int8_t', $K_INT8],
96      ['uint8_t', $K_UINT8],
97      ['short', $K_SHORT],
98      ['int16_t', $K_SHORT],
99      ['uint16_t', $K_USHORT],
100      ['int', $K_INT],
101      ['gint', $K_INT],
102      ['uint', $K_UINT],
103      ['uint32_t', $K_UINT],
104      ['int32_t', $K_INT],
105      ['long', $K_LONG],
106      ['float', $K_FLOAT],
107      ['double', $K_DOUBLE],
108      ['complex', $K_COMPLEX],
109      ['typedef', $K_TYPEDEF],
110      ['struct', $K_STRUCT],
111      ['union', $K_UNION],
112      ['const', $K_CONST],
113      ['volatile', $K_VOLATILE],
114      ['static', $K_STATIC],
115      ['enum', $K_ENUM],
116      ['void', $K_VOID],
117      ['DT_MODULE_INTROSPECTION', $K_DT_MODULE_INTROSPECTION]
118);
119
120$i = 0;
121# operators
122my  @O_readable;
123our $O_ASTERISK = $i++; push(@O_readable, '*');
124our $O_AMPERSAND = $i++; push(@O_readable, '&');
125our $O_SEMICOLON = $i++; push(@O_readable, ';');
126our $O_COMMA = $i++; push(@O_readable, ',');
127our $O_COLON = $i++; push(@O_readable, ':');
128our $O_SLASH = $i++; push(@O_readable, '/');
129our $O_LEFTROUND = $i++; push(@O_readable, '(');
130our $O_RIGHTROUND = $i++; push(@O_readable, ')');
131our $O_LEFTCURLY = $i++; push(@O_readable, '{');
132our $O_RIGHTCURLY = $i++; push(@O_readable, '}');
133our $O_LEFTSQUARE = $i++; push(@O_readable, '[');
134our $O_RIGHTSQUARE = $i++; push(@O_readable, ']');
135our $O_EQUAL = $i++; push(@O_readable, '=');
136our $O_PLUS = $i++; push(@O_readable, '+');
137our $O_MINUS = $i++; push(@O_readable, '-');
138our $O_LESS = $i++; push(@O_readable, '<');
139our $O_LESSLESS = $i++; push(@O_readable, '<<');
140our $O_GREATER = $i++; push(@O_readable, '>');
141our $O_GREATERGREATER = $i++; push(@O_readable, '>>');
142our $O_PERCENT = $i++; push(@O_readable, '%');
143our $O_CIRCUMFLEX = $i++; push(@O_readable, '^');
144
145sub read_file
146{
147  $file = shift;
148
149  return if(defined($history{$file}));
150  $history{$file} = 1;
151
152  open(IN, "<$file") or return;
153  $lineno = 1;
154  my @tmp = <IN>;
155  close(IN);
156  my $result = join('', @tmp);
157  unshift(@code, split(//, $result));
158}
159
160# TODO: support something else than decimal numbers, i.e., octal and hex
161sub read_number
162{
163  my $c = shift(@code);
164  my @buf;
165  while($c =~ /[0-9]/)
166  {
167    push(@buf, $c);
168    $c = shift(@code);
169  }
170  unshift(@code, $c);
171  return join('', @buf);
172}
173
174sub read_string
175{
176  my $c = shift(@code);
177  my @buf;
178  while(defined($c) && $c =~ /[a-zA-Z_0-9]/)
179  {
180    push(@buf, $c);
181    $c = shift(@code);
182  }
183  unshift(@code, $c);
184  return join('', @buf);
185}
186
187sub handle_comment
188{
189  my $_lineno = $lineno;
190  shift(@code);
191  my $c = $code[0];
192  my @buf;
193  if($c eq '/')
194  {
195    # a comment of the form '//'. this goes till the end of the line
196    while(defined($c) && $c ne "\n")
197    {
198      push(@buf, $c);
199      $c = shift(@code);
200    }
201    unshift(@code, $c);
202    $lineno++;
203  }
204  elsif($c eq '*')
205  {
206    # a comment of the form '/*'. this goes till we find '*/'
207    while(defined($c) && ($c ne '*' || $code[0] ne '/'))
208    {
209      $lineno++ if($c eq "\n");
210      push(@buf, $c);
211      $c = shift(@code);
212    }
213    push(@buf, $c);
214  }
215  else
216  {
217    # can't happen
218    print STDERR "comment error\n";
219  }
220  my $comment = join('', @buf);
221
222  push(@{$comments{$file}[$_lineno]{raw}}, $comment);
223}
224
225sub handle_include
226{
227  my $c = ' ';
228  $c = shift(@code) while($c eq ' ');
229  my $end;
230  if($c eq '"') { $end = '"'; }
231#   elsif($c eq '<') { $end = '>'; }
232  else
233  {
234    unshift(@code, $c);
235    return;
236  }
237  $c = shift(@code);
238  my @buf;
239  while(defined($c) && $c ne $end)
240  {
241    if($c eq "\n") # no idea how to handle this, just ignore it
242    {
243      unshift(@code, $c);
244      ++$lineno;
245      return;
246    }
247    push(@buf, $c);
248    $c = shift(@code);
249  }
250  unshift(@code, $c);
251  return if(!defined($c));
252
253  my $filename = join('', @buf);
254
255  if($filename =~ /^iop|^common/)
256  {
257    # add the current filename and lineno to the code stream so we
258    # can reset these when the included file is scanned
259    # note that all entries in @code coming from the files are single characters,
260    # so we can safely add longer strings
261    unshift(@code, 'undo_include', $file, $lineno);
262    read_file($folder.$filename);
263  }
264}
265
266sub handle_define
267{
268  # just read until the end of the line
269  my $c = ' ';
270  $c = shift(@code) while(defined($code[0]) && $c ne "\n");
271  unshift(@code, $c);
272}
273
274sub handle_preprocessor
275{
276  my $string = read_string();
277  if($string eq "include") { handle_include(); }
278  elsif($string eq "define") { handle_define(); }
279  unshift(@code, ' ');
280}
281
282sub read_token
283{
284  for(; defined($code[0]); shift(@code))
285  {
286    my $c = $code[0];
287    if($c eq "\n") { ++$lineno;}
288    elsif($c eq " " || $c eq "\t") { next; }
289    elsif($c eq "#") { shift(@code); handle_preprocessor(); next; }
290    elsif($c eq "undo_include") { shift(@code); $file = shift(@code); $lineno = shift(@code); }
291    elsif($c eq "&") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_AMPERSAND); }
292    elsif($c eq "*") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_ASTERISK); }
293    elsif($c eq "/" && ($code[1] eq "/" || $code[1] eq "*" ))
294    {
295      handle_comment();
296      next;
297    }
298    elsif($c eq ";") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_SEMICOLON); }
299    elsif($c eq ",") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_COMMA); }
300    elsif($c eq "(") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTROUND); }
301    elsif($c eq ")") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTROUND); }
302    elsif($c eq "{") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTCURLY); }
303    elsif($c eq "}") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTCURLY); }
304    elsif($c eq "[") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_LEFTSQUARE); }
305    elsif($c eq "]") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_RIGHTSQUARE); }
306    elsif($c eq ":") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_COLON); }
307    elsif($c eq "=") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_EQUAL); }
308    elsif($c eq "+") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_PLUS); }
309    elsif($c eq "-") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_MINUS); }
310    elsif($c eq "<")
311    {
312      shift(@code);
313      if($code[0] eq "<")
314      {
315        shift(@code);
316        return ($lineno, $file, $T_OPERATOR, $O_LESSLESS);
317      }
318      else
319      {
320        return ($lineno, $file, $T_OPERATOR, $O_LESS);
321      }
322    }
323    elsif($c eq ">")
324    {
325      shift(@code);
326      if($code[0] eq ">")
327      {
328        shift(@code);
329        return ($lineno, $file, $T_OPERATOR, $O_GREATERGREATER);
330      }
331      else
332      {
333        return ($lineno, $file, $T_OPERATOR, $O_GREATER);
334      }
335    }
336    elsif($c eq "%") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_PERCENT); }
337    elsif($c eq "^") { shift(@code); return ($lineno, $file, $T_OPERATOR, $O_CIRCUMFLEX); }
338    elsif($c =~ /^[0-9]$/)
339    {
340      my $number = read_number();
341      return ($lineno, $file, $T_INTEGER_LITERAL, $number);
342    }
343    elsif($c =~ /^[a-zA-Z_]$/)
344    {
345      my $string = read_string();
346      foreach(@keywords)
347      {
348        my @entry = @{$_};
349        if($string eq $entry[0])
350        {
351          return ($lineno, $file, $T_KEYWORD, $entry[1]);
352        }
353      }
354      return ($lineno, $file, $T_IDENT, "$string");
355    }
356    else {
357      # we don't care that we can't understand every input symbol, we just read over them until we reach something we know.
358      # everything we see from there on should be handled by the scanner/parser
359      # print STDERR "scanner error: ".$c."\n";
360    }
361  }
362  return ($lineno, $file, $T_NONE, 0);
363}
364
365sub get_token
366{
367  my $n_tokens = @tokens;
368  return read_token() if($n_tokens == 0);
369  return @{shift(@tokens)};
370}
371
372sub look_ahead
373{
374  my $steps = shift;
375  my $n_tokens = @tokens;
376
377  return $tokens[$steps-1] if($n_tokens >= $steps);
378
379  my @token;
380  for(my $i = $n_tokens; $i < $steps; ++$i )
381  {
382    @token = read_token();
383    return @token if($token[$P_TYPE] == $T_NONE);              # Can't look ahead that far.
384    push(@tokens, [@token]);
385  }
386  return @token;
387}
388
389sub token2string
390{
391  my $token = shift;
392  my $result;
393
394  if   ($token[$P_TYPE] == $T_NONE)            { $result = '<EMPTY TOKEN>'; }
395  elsif($token[$P_TYPE] == $T_IDENT)           { $result = $token[$P_VALUE]; }
396  elsif($token[$P_TYPE] == $T_KEYWORD)         { $result = $K_readable[$token[$P_VALUE]]; }
397  elsif($token[$P_TYPE] == $T_INTEGER_LITERAL) { $result = $token[$P_VALUE]; }
398  elsif($token[$P_TYPE] == $T_OPERATOR)        { $result = $O_readable[$token[$P_VALUE]]; }
399  else                                         { $result = '<UNKNOWN TOKEN TYPE>'; }
400
401  return $result;
402}
403
404sub issemicolon { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_SEMICOLON); }
405sub isleftcurly { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTCURLY); }
406sub isrightcurly { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTCURLY); }
407sub isleftround { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTROUND); }
408sub isrightround { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTROUND); }
409sub isleftsquare { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_LEFTSQUARE); }
410sub isrightsquare { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_RIGHTSQUARE); }
411sub iscomma { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_COMMA); }
412sub isasterisk { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_ASTERISK); }
413sub isequal { my $token = shift; return ($token[$P_TYPE] == $T_OPERATOR && $token[$P_VALUE] == $O_EQUAL); }
414sub isid { my $token = shift; return ($token[$P_TYPE] == $T_IDENT); }
415sub isinteger { my $token = shift; return ($token[$P_TYPE] == $T_INTEGER_LITERAL); }
416sub istypedef { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_TYPEDEF); }
417sub isstruct { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_STRUCT); }
418sub isunion { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_UNION); }
419sub isenum { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_ENUM); }
420sub isconst { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_CONST); }
421sub isvolatile { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_VOLATILE); }
422sub isdtmoduleintrospection { my $token = shift; return ($token[$P_TYPE] == $T_KEYWORD && $token[$P_VALUE] == $K_DT_MODULE_INTROSPECTION); }
423
4241;
425
426# modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
427# vim: shiftwidth=2 expandtab tabstop=2 cindent
428# kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-space on;
429