1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1997-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% 21-module(icscan). 22 23 24-export([scan/2]). 25 26-include("ic.hrl"). 27 28 29%%---------------------------------------------------------------------- 30%%---------------------------------------------------------------------- 31 32-import(lists, [reverse/1]). 33 34 35scan(G, File) -> 36 PL = call_preproc(G, File), 37 call_scan(G, PL). 38 39call_preproc(G, File) -> 40 case ic_options:get_opt(G, use_preproc) of 41 true -> 42 icpreproc:preproc(G, File); 43 false -> 44 case catch file:read_file(File) of 45 {ok, Bin} -> 46 binary_to_list(Bin); 47 Other -> 48 exit(Other) 49 end 50 end. 51 52call_scan(G, PL) -> 53 BE = ic_options:get_opt(G, be), 54 RSL = scan(G, BE, PL, 1, []), 55 lists:reverse(RSL). 56 57 58%% Guard macros used at top scan functions only 59-define(is_number(X), X >= $0 , X =< $9). 60-define(is_upper(X), X >= $A , X =< $Z). 61-define(is_lower(X), X >= $a, X =< $z). 62-define(is_hex_uc(X), X >= $A , X =< $F). 63-define(is_hex_lc(X), X >= $a , X =< $f). 64-define(is_octal(X), X >=$0, X =< $7). 65 66%% Handle: 67%% const wchar aWChar = L'X'; 68scan(G, BE, [$L, $'|Str], Line, Out) -> 69 scan_const(G, BE, wchar, Str, [], Line, Out); 70scan(G, BE, [$L, $"|Str], Line, Out) -> 71 scan_const(G, BE, wstring, Str, [], Line, Out); 72scan(G, BE, [$_, X|Str], Line, Out) when ?is_upper(X) -> 73 scan_name(G, BE, Str, [X], false, Line, Out); 74scan(G, BE, [$_, X|Str], Line, Out) when ?is_lower(X) -> 75 scan_name(G, BE, Str, [X], false, Line, Out); 76scan(G, BE, [X|Str], Line, Out) when ?is_upper(X) -> 77 scan_name(G, BE, Str, [X], true, Line, Out); 78scan(G, BE, [X|Str], Line, Out) when ?is_lower(X) -> 79 scan_name(G, BE, Str, [X], true, Line, Out); 80scan(G, BE, [X|Str], Line, Out) when ?is_number(X) -> 81 scan_number(G, BE, Str, [X], Line, Out); 82scan(G, BE, [9| T], Line, Out) -> scan(G, BE, T, Line, Out); 83scan(G, BE, [32| T], Line, Out) -> scan(G, BE, T, Line, Out); 84scan(G, BE, [$\r|Str], Line, Out) -> 85 scan(G, BE, Str, Line, Out); 86scan(G, BE, [$\n|Str], Line, Out) -> 87 scan(G, BE, Str, Line+1, Out); 88scan(G, BE, [$:, $: | Str], Line, Out) -> 89 scan(G, BE, Str, Line, [{'::', Line} | Out]); 90scan(G, BE, [$/, $/ | Str], Line, Out) -> 91 Rest = skip_to_nl(Str), 92 scan(G, BE, Rest, Line, Out); 93scan(G, BE, [$/, $* | Str], Line, Out) -> 94 Rest = skip_comment(Str), 95 scan(G, BE, Rest, Line, Out); 96scan(G, BE, [$", $\\|Str], Line, Out) -> 97 scan_const(G, BE, string, [$\\|Str], [], Line, Out); 98scan(G, BE, [$"|Str], Line, Out) -> 99 scan_const(G, BE, string, Str, [], Line, Out); 100scan(G, BE, [$', $\\|Str], Line, Out) -> 101 scan_const(G, BE, char, [$\\|Str], [], Line, Out); 102scan(G, BE, [$'|Str], Line, Out) -> 103 scan_const(G, BE, char, Str, [], Line, Out); 104scan(G, BE, [$\\|Str], Line, Out) -> 105 scan_const(G, BE, escaped, [$\\|Str], [], Line, Out); 106scan(G, BE, [$. | Str], Line, Out) -> 107 scan_frac(G, BE, Str, [$.], Line, Out); 108scan(G, BE, [$# | Str], Line, Out) -> 109 scan_preproc(G, BE, Str, Line, Out); 110scan(G, BE, [$<, $< | Str], Line, Out) -> 111 scan(G, BE, Str, Line, [{'<<', Line} | Out]); 112scan(G, BE, [$>, $> | Str], Line, Out) -> 113 scan(G, BE, Str, Line, [{'>>', Line} | Out]); 114scan(G, BE, [C|Str], Line, Out) -> 115 scan(G, BE, Str, Line, [{list_to_atom([C]), Line} | Out]); 116 117scan(_G, _BE, [], _Line, Out) -> 118 Out. 119 120 121scan_number(G, BE, [X|Str], [$0], Line, Out) when X == $X ; X ==$x -> 122 case Str of 123 [D|_TmpStr] when ?is_number(D); ?is_hex_uc(D); ?is_hex_lc(D) -> 124 {Num,Rest} = scan_hex_number(Str,0), 125 scan(G, BE, Rest, Line, [{'<integer_literal>', Line, 126 integer_to_list(Num)} | Out]); 127 [D|TmpStr] -> 128 scan(G, BE, TmpStr, Line, [{list_to_atom([D]), Line} | Out]) 129 end; 130scan_number(G, BE, Str, [$0], Line, Out) -> 131 %% If an integer literal starts with a 0 it may indicate that 132 %% it is represented as an octal number. But, it can also be a fixed 133 %% type which must use padding to match a fixed typedef. For example: 134 %% typedef fixed<5,2> fixed52; 135 %% 123.45d, 123.00d and 023.00d is all valid fixed values. 136 %% Naturally, a float can be defined as 0.14 or 00.14. 137 case pre_scan_number(Str, [], octal) of 138 octal -> 139 {Num, Rest} = scan_octal_number(Str,0), 140 scan(G, BE, Rest, Line, [{'<integer_literal>', Line, 141 integer_to_list(Num)} | Out]); 142 {fixed, Fixed, Rest} -> 143 scan(G, BE, Rest, Line, [{'<fixed_pt_literal>', Line, Fixed} | Out]); 144 float -> 145 %% Not very likely that someone defines a constant as 00.14 but ... 146 NewStr = remove_leading_zeroes(Str), 147 scan(G, BE, NewStr, Line, Out) 148 end; 149scan_number(G, BE, [X|Str], Accum, Line, Out) when ?is_number(X) -> 150 scan_number(G, BE, Str, [X|Accum], Line, Out); 151scan_number(G, BE, [X|Str], Accum, Line, Out) when X==$. -> 152 scan_frac(G, BE, Str, [X|Accum], Line, Out); 153scan_number(G, BE, [X|Str], Accum, Line, Out) when X==$e ; X==$e -> 154 scan_exp(G, BE, Str, [X|Accum], Line, Out); 155scan_number(G, BE, [X|Str], Accum, Line, Out) when X==$D ; X==$d -> 156 scan(G, BE, Str, Line, [{'<fixed_pt_literal>', Line, 157 (lists:reverse(Accum))} | Out]); 158scan_number(G, BE, Str, Accum, Line, Out) -> 159 scan(G, BE, Str, Line, [{'<integer_literal>', Line, 160 (lists:reverse(Accum))} | Out]). 161 162 163remove_leading_zeroes([$0|Rest]) -> 164 remove_leading_zeroes(Rest); 165remove_leading_zeroes(L) -> 166 L. 167 168scan_hex_number([X|Rest],Acc) when X >=$a, X =< $f -> 169 scan_hex_number(Rest,(Acc bsl 4) + (X - $a + 10)); 170scan_hex_number([X|Rest],Acc) when X >=$A, X =< $F -> 171 scan_hex_number(Rest,(Acc bsl 4) + (X - $A + 10)); 172scan_hex_number([X|Rest],Acc) when X >=$0, X =< $9 -> 173 scan_hex_number(Rest,(Acc bsl 4) + (X-$0)); 174scan_hex_number(Rest,Acc) -> 175 {Acc,Rest}. 176 177pre_scan_number([$d|Rest], Acc, _) -> 178 {fixed, [$0|lists:reverse(Acc)], Rest}; 179pre_scan_number([$D|Rest], Acc, _) -> 180 {fixed, [$0|lists:reverse(Acc)], Rest}; 181pre_scan_number([$.|Rest], Acc, _) -> 182 %% Actually, we don't know if it's a float since it can be a fixed. 183 pre_scan_number(Rest, [$.|Acc], float); 184pre_scan_number([X|_], _Acc, _) when X == $E ; X ==$e -> 185 %% Now we now it's a float. 186 float; 187pre_scan_number([X|Rest], Acc, Type) when ?is_number(X) -> 188 pre_scan_number(Rest, [X|Acc], Type); 189pre_scan_number(_Rest, _Acc, Type) -> 190 %% At this point we know it's a octal or float. 191 Type. 192 193scan_octal_number([X|Rest],Acc) when ?is_octal(X) -> 194 scan_octal_number(Rest,(Acc bsl 3) + (X-$0)); 195scan_octal_number(Rest,Acc) -> 196 {Acc, Rest}. 197 198%% Floating point number scan. 199%% 200%% Non trivial scan. A float consists of an integral part, a 201%% decimal point, a fraction part, an e or E and a signed integer 202%% exponent. Either the integer part or the fraction part but not 203%% both may be missing, and either the decimal point or the 204%% exponent part but not both may be missing. The exponent part 205%% must consist of an e or E and a possibly signed exponent. 206%% 207%% Analysis shows that "1." ".7" "1e2" ".5e-3" "1.7e2" "1.7e-2" 208%% is allowed and "1" ".e9" is not. The sign is only allowed just 209%% after an e or E. The scanner reads a number as an integer 210%% until it encounters a "." so the integer part only error case 211%% will not be caught in the scanner (but rather in expression 212%% evaluation) 213 214scan_frac(G, _BE, [$e | _Str], [$.], Line, _Out) -> 215 ic_error:fatal_error(G, {illegal_float, Line}); 216scan_frac(G, _BE, [$E | _Str], [$.], Line, _Out) -> 217 ic_error:fatal_error(G, {illegal_float, Line}); 218scan_frac(G, BE, Str, Accum, Line, Out) -> 219 scan_frac2(G, BE, Str, Accum, Line, Out). 220 221scan_frac2(G, BE, [X|Str], Accum, Line, Out) when ?is_number(X) -> 222 scan_frac2(G, BE, Str, [X|Accum], Line, Out); 223scan_frac2(G, BE, [X|Str], Accum, Line, Out) when X==$e ; X==$E -> 224 scan_exp(G, BE, Str, [X|Accum], Line, Out); 225%% The following case is for fixed (e.g. 123.45d). 226scan_frac2(G, BE, [X|Str], Accum, Line, Out) when X==$d ; X==$D -> 227 scan(G, BE, Str, Line, [{'<fixed_pt_literal>', Line, 228 (lists:reverse(Accum))} | Out]); 229scan_frac2(G, BE, Str, Accum, Line, Out) -> 230 scan(G, BE, Str, Line, [{'<floating_pt_literal>', Line, 231 (lists:reverse(Accum))} | Out]). 232 233scan_exp(G, BE, [X|Str], Accum, Line, Out) when X==$- -> 234 scan_exp2(G, BE, Str, [X|Accum], Line, Out); 235scan_exp(G, BE, Str, Accum, Line, Out) -> 236 scan_exp2(G, BE, Str, Accum, Line, Out). 237 238scan_exp2(G, BE, [X|Str], Accum, Line, Out) when ?is_number(X) -> 239 scan_exp2(G, BE, Str, [X|Accum], Line, Out); 240scan_exp2(G, BE, Str, Accum, Line, Out) -> 241 scan(G, BE, Str, Line, [{'<floating_pt_literal>', Line, 242 (lists:reverse(Accum))} | Out]). 243 244 245scan_name(G, BE, [X|Str], Accum, TypeCheck, Line, Out) when ?is_upper(X) -> 246 scan_name(G, BE, Str, [X|Accum], TypeCheck, Line, Out); 247scan_name(G, BE, [X|Str], Accum, TypeCheck, Line, Out) when ?is_lower(X) -> 248 scan_name(G, BE, Str, [X|Accum], TypeCheck, Line, Out); 249scan_name(G, BE, [X|Str], Accum, TypeCheck, Line, Out) when ?is_number(X) -> 250 scan_name(G, BE, Str, [X|Accum], TypeCheck, Line, Out); 251scan_name(G, BE, [$_|Str], Accum, TypeCheck, Line, Out) -> 252 scan_name(G, BE, Str, [$_|Accum], TypeCheck, Line, Out); 253scan_name(G, BE, S, Accum, false, Line, Out) -> 254 %% The CORBA 2.3 specification allows the user to override typechecking: 255 %% typedef string _native; 256 %% interface i { 257 %% void foo(in _native VT); 258 %% }; 259 %% BUT, the IFR-id remains the same ("IDL:native:1.0") etc. The reason for 260 %% this is that one don't have to re-write a large chunk of IDL- and 261 %% application-code. 262 scan(G, BE, S, Line, [{'<identifier>', Line, lists:reverse(Accum)} | Out]); 263scan_name(G, BE, S, Accum, _, Line, Out) -> 264 L = lists:reverse(Accum), 265 X = case is_reserved(L, BE) of 266 undefined -> 267 {'<identifier>', Line, L}; 268 Yes -> 269 {Yes, Line} 270 end, 271 scan(G, BE, S, Line, [X | Out]). 272 273%% Shall scan a constant 274scan_const(G, BE, string, [$" | Rest], Accum, Line, [{'<string_literal>', _, Str}|Out]) -> 275 scan(G, BE, Rest, Line, 276 [{'<string_literal>', Line, Str ++ lists:reverse(Accum)} | Out]); 277scan_const(G, BE, string, [$" | Rest], Accum, Line, Out) -> 278 scan(G, BE, Rest, Line, 279 [{'<string_literal>', Line, lists:reverse(Accum)} | Out]); 280scan_const(G, BE, wstring, [$" | Rest], Accum, Line, [{'<wstring_literal>', _,Wstr}|Out]) -> %% WSTRING 281 scan(G, BE, Rest, Line, 282 [{'<wstring_literal>', Line, Wstr ++ lists:reverse(Accum)} | Out]); 283scan_const(G, BE, wstring, [$" | Rest], Accum, Line, Out) -> %% WSTRING 284 scan(G, BE, Rest, Line, 285 [{'<wstring_literal>', Line, lists:reverse(Accum)} | Out]); 286scan_const(G, _BE, string, [], _Accum, Line, Out) -> %% Bad string 287 ic_error:error(G, {bad_string, Line}), 288 Out; 289scan_const(G, _BE, wstring, [], _Accum, Line, Out) -> %% Bad WSTRING 290 ic_error:error(G, {bad_string, Line}), 291 Out; 292scan_const(G, BE, char, [$' | Rest], Accum, Line, Out) -> 293 scan(G, BE, Rest, Line, 294 [{'<character_literal>', Line, lists:reverse(Accum)} | Out]); 295scan_const(G, BE, wchar, [$' | Rest], Accum, Line, Out) -> %% WCHAR 296 scan(G, BE, Rest, Line, 297 [{'<wcharacter_literal>', Line, lists:reverse(Accum)} | Out]); 298scan_const(G, BE, Mode, [$\\, C | Rest], Accum, Line, Out) -> 299 case escaped_char(C) of 300 error -> 301 ic_error:error(G, {bad_escape_character, Line, C}), %% Bad escape character 302 scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out); 303 octal -> 304 {Num,Rest2} = scan_octal_number([C|Rest], 0), 305 scan_const(G, BE, Mode, Rest2, [Num|Accum], Line, Out); 306 hexadecimal -> 307 {Num,Rest2} = scan_hex_number(Rest, 0), 308 if 309 Num > 255 -> %% 16#FF 310 ic_error:error(G, {bad_escape_character, Line, C}), 311 scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out); 312 true -> 313 scan_const(G, BE, Mode, Rest2, [Num|Accum], Line, Out) 314 end; 315 unicode -> 316 {Num,Rest2} = scan_hex_number(Rest, 0), 317 if 318 Num > 65535 -> %% 16#FFFF 319 ic_error:error(G, {bad_escape_character, Line, C}), 320 scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out); 321 true -> 322 scan_const(G, BE, Mode, Rest2, [Num|Accum], Line, Out) 323 end; 324 EC -> 325 scan_const(G, BE, Mode, Rest, [EC | Accum], Line, Out) 326 end; 327scan_const(G, BE, Mode, [C | Rest], Accum, Line, Out) -> 328 scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out). 329 330 331%% 332%% Preprocessor output handling 333%% 334%% gcc outputs a line with line number, file name (within \") and 335%% one or more integer flags. The scanner scans the line number, 336%% the id and all integers up to nl. 337%% 338%% NOTE: This will have to be enhanced in order to eat #pragma 339%% 340scan_preproc(G, BE, Str, Line, Out) -> 341 {List, Rest} = scan_to_nl(strip(Str), []), 342 NewLine = get_new_line_nr(strip(List), Line+1, []), 343 case scan_number(G, BE, List, [], Line, [{'#', Line} | Out]) of 344 L when is_list(L) -> 345 scan(G, BE, Rest, NewLine, [{'#', Line} | L]) 346 end. 347 348get_new_line_nr([C|R], Line, Acc) when C>=$0, C=<$9 -> 349 get_new_line_nr(R, Line, [C|Acc]); 350get_new_line_nr(_, Line, []) -> Line; % No line nr found 351get_new_line_nr(_, _, Acc) -> list_to_integer(reverse(Acc)). 352 353scan_to_nl([], Acc) -> {reverse(Acc), []}; 354scan_to_nl([$\n|Str], Acc) -> {reverse(Acc), Str}; 355scan_to_nl([$\r|R], Acc) -> scan_to_nl(R, Acc); 356scan_to_nl([C|R], Acc) -> scan_to_nl(R, [C|Acc]). 357 358strip([$ |R]) -> strip(R); 359strip(L) -> L. 360 361%% Escaped character. Escaped chars are repr as two characters in the 362%% input list of letters and this is translated into one char. 363escaped_char($n) -> $\n; 364escaped_char($t) -> $\t; 365escaped_char($v) -> $\v; 366escaped_char($b) -> $\b; 367escaped_char($r) -> $ ; 368escaped_char($f) -> $\f; 369escaped_char($a) -> $\a; 370escaped_char($\\) -> $\\; 371escaped_char($?) -> $?; 372escaped_char($') -> $'; 373escaped_char($") -> $"; 374escaped_char($x) -> hexadecimal; 375escaped_char($u) -> unicode; 376escaped_char(X) when ?is_octal(X) -> octal; 377%% Error 378escaped_char(_Other) -> error. 379 380skip_to_nl([]) -> []; 381skip_to_nl([$\n | Str]) ->[$\n | Str]; 382skip_to_nl([_|Str]) -> 383 skip_to_nl(Str). 384 385skip_comment([$\\, _ | Str]) -> 386 skip_comment(Str); 387skip_comment([$*, $/ | Str]) -> Str; 388skip_comment([_|Str]) -> 389 skip_comment(Str). 390 391 392%%---------------------------------------------------------------------- 393%% Shall separate keywords from identifiers and numbers 394 395%% Fill in the ets of reserved words 396is_reserved("Object", _) -> 'Object'; 397is_reserved("in", _) -> in; 398is_reserved("interface", _) -> interface; 399is_reserved("case", _) -> 'case'; 400is_reserved("union", _) -> union; 401is_reserved("struct", _) -> struct; 402is_reserved("any", _) -> any; 403is_reserved("long", _) -> long; 404is_reserved("float", _) -> float; 405is_reserved("out", _) -> out; 406is_reserved("enum", _) -> enum; 407is_reserved("double", _) -> double; 408is_reserved("context", _) -> context; 409is_reserved("oneway", _) -> oneway; 410is_reserved("sequence", _) -> sequence; 411is_reserved("FALSE", _) -> 'FALSE'; 412is_reserved("readonly", _) -> readonly; 413is_reserved("char", _) -> char; 414is_reserved("wchar", _) -> wchar; 415is_reserved("void", _) -> void; 416is_reserved("inout", _) -> inout; 417is_reserved("attribute", _) -> attribute; 418is_reserved("octet", _) -> octet; 419is_reserved("TRUE", _) -> 'TRUE'; 420is_reserved("switch", _) -> switch; 421is_reserved("unsigned", _) -> unsigned; 422is_reserved("typedef", _) -> typedef; 423is_reserved("const", _) -> const; 424is_reserved("raises", _) -> raises; 425is_reserved("string", _) -> string; 426is_reserved("wstring", _) -> wstring; 427is_reserved("default", _) -> default; 428is_reserved("short", _) -> short; 429is_reserved("module", _) -> module; 430is_reserved("exception", _) -> exception; 431is_reserved("boolean", _) -> boolean; 432%% --- New keywords Introduced in CORBA-2.3.1 --- 433%% For now we cannot add these for all backends right now since it would cause 434%% some problems for at least one customer. 435is_reserved("fixed", BE) -> check_be(BE, fixed); 436%is_reserved("abstract", BE) -> check_be(BE, abstract); 437%is_reserved("custom", BE) -> check_be(BE, custom); 438%is_reserved("factory", BE) -> check_be(BE, factory); 439%is_reserved("local", BE) -> check_be(BE, local); 440%is_reserved("native", BE) -> check_be(BE, native); 441%is_reserved("private", BE) -> check_be(BE, private); 442%is_reserved("public", BE) -> check_be(BE, public); 443%is_reserved("supports", BE) -> check_be(BE, supports); 444%is_reserved("truncatable", BE) -> check_be(BE, truncatable); 445%is_reserved("ValueBase", BE) -> check_be(BE, 'ValueBase'); 446%is_reserved("valuetype", BE) -> check_be(BE, valuetype); 447is_reserved(_, _) -> undefined. 448 449check_be(erl_corba, KeyWord) -> 450 KeyWord; 451check_be(_, _) -> 452 undefined. 453 454