1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1997-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%%
21-module(icscan).
22
23
24-export([scan/2]).
25
26-include("ic.hrl").
27
28
29%%----------------------------------------------------------------------
30%%----------------------------------------------------------------------
31
32-import(lists, [reverse/1]).
33
34
35scan(G, File) ->
36    PL = call_preproc(G, File),
37    call_scan(G, PL).
38
39call_preproc(G, File) ->
40    case ic_options:get_opt(G, use_preproc) of
41	true ->
42	    icpreproc:preproc(G, File);
43	false ->
44	    case catch file:read_file(File) of
45		{ok, Bin} ->
46		    binary_to_list(Bin);
47		Other ->
48		    exit(Other)
49	    end
50    end.
51
52call_scan(G, PL) ->
53    BE = ic_options:get_opt(G, be),
54    RSL = scan(G, BE, PL, 1, []),
55    lists:reverse(RSL).
56
57
58%% Guard macros used at top scan functions only
59-define(is_number(X), X >= $0 , X =< $9).
60-define(is_upper(X), X >= $A , X =< $Z).
61-define(is_lower(X), X >= $a, X =< $z).
62-define(is_hex_uc(X), X >= $A , X =< $F).
63-define(is_hex_lc(X), X >= $a , X =< $f).
64-define(is_octal(X), X >=$0, X =< $7).
65
66%% Handle:
67%%  const wchar aWChar = L'X';
68scan(G, BE, [$L, $'|Str], Line, Out) ->
69    scan_const(G, BE, wchar, Str, [], Line, Out);
70scan(G, BE, [$L, $"|Str], Line, Out) ->
71    scan_const(G, BE, wstring, Str, [], Line, Out);
72scan(G, BE, [$_, X|Str], Line, Out) when ?is_upper(X) ->
73    scan_name(G, BE, Str, [X], false, Line, Out);
74scan(G, BE, [$_, X|Str], Line, Out) when ?is_lower(X) ->
75    scan_name(G, BE, Str, [X], false, Line, Out);
76scan(G, BE, [X|Str], Line, Out) when ?is_upper(X) ->
77    scan_name(G, BE, Str, [X], true, Line, Out);
78scan(G, BE, [X|Str], Line, Out) when ?is_lower(X) ->
79    scan_name(G, BE, Str, [X], true, Line, Out);
80scan(G, BE, [X|Str], Line, Out) when ?is_number(X) ->
81    scan_number(G, BE, Str, [X], Line, Out);
82scan(G, BE, [9| T], Line, Out) -> scan(G, BE, T, Line, Out);
83scan(G, BE, [32| T], Line, Out) -> scan(G, BE, T, Line, Out);
84scan(G, BE, [$\r|Str], Line, Out) ->
85    scan(G, BE, Str, Line, Out);
86scan(G, BE, [$\n|Str], Line, Out) ->
87    scan(G, BE, Str, Line+1, Out);
88scan(G, BE, [$:, $: | Str], Line, Out) ->
89    scan(G, BE, Str, Line, [{'::', Line} | Out]);
90scan(G, BE, [$/, $/ | Str], Line, Out) ->
91    Rest = skip_to_nl(Str),
92    scan(G, BE, Rest, Line, Out);
93scan(G, BE, [$/, $* | Str], Line, Out) ->
94    Rest = skip_comment(Str),
95    scan(G, BE, Rest, Line, Out);
96scan(G, BE, [$", $\\|Str], Line, Out) ->
97    scan_const(G, BE, string, [$\\|Str], [], Line, Out);
98scan(G, BE, [$"|Str], Line, Out) ->
99    scan_const(G, BE, string, Str, [], Line, Out);
100scan(G, BE, [$', $\\|Str], Line, Out) ->
101    scan_const(G, BE, char, [$\\|Str], [], Line, Out);
102scan(G, BE, [$'|Str], Line, Out) ->
103    scan_const(G, BE, char, Str, [], Line, Out);
104scan(G, BE, [$\\|Str], Line, Out) ->
105    scan_const(G, BE, escaped, [$\\|Str], [], Line, Out);
106scan(G, BE, [$. | Str], Line, Out) ->
107    scan_frac(G, BE, Str, [$.], Line, Out);
108scan(G, BE, [$# | Str], Line, Out) ->
109    scan_preproc(G, BE, Str, Line, Out);
110scan(G, BE, [$<, $< | Str], Line, Out) ->
111    scan(G, BE, Str, Line, [{'<<', Line} | Out]);
112scan(G, BE, [$>, $> | Str], Line, Out) ->
113    scan(G, BE, Str, Line, [{'>>', Line} | Out]);
114scan(G, BE, [C|Str], Line, Out) ->
115    scan(G, BE, Str, Line, [{list_to_atom([C]), Line} | Out]);
116
117scan(_G, _BE, [], _Line, Out) ->
118    Out.
119
120
121scan_number(G, BE, [X|Str], [$0], Line, Out) when X == $X ; X ==$x ->
122    case Str of
123	[D|_TmpStr] when ?is_number(D); ?is_hex_uc(D); ?is_hex_lc(D) ->
124	    {Num,Rest} = scan_hex_number(Str,0),
125	    scan(G, BE, Rest, Line, [{'<integer_literal>', Line,
126				  integer_to_list(Num)} | Out]);
127	[D|TmpStr] ->
128	    scan(G, BE, TmpStr, Line, [{list_to_atom([D]), Line} | Out])
129    end;
130scan_number(G, BE, Str, [$0], Line, Out) ->
131    %% If an integer literal starts with a 0 it may indicate that
132    %% it is represented as an octal number. But, it can also be a fixed
133    %% type which must use padding to match a fixed typedef. For example:
134    %% typedef fixed<5,2> fixed52;
135    %% 123.45d, 123.00d and 023.00d is all valid fixed values.
136    %% Naturally, a float can be defined as 0.14 or 00.14.
137    case pre_scan_number(Str, [], octal) of
138	octal ->
139	    {Num, Rest} = scan_octal_number(Str,0),
140	    scan(G, BE, Rest, Line, [{'<integer_literal>', Line,
141				  integer_to_list(Num)} | Out]);
142	{fixed, Fixed, Rest} ->
143	    scan(G, BE, Rest, Line, [{'<fixed_pt_literal>', Line, Fixed} | Out]);
144	float ->
145	    %% Not very likely that someone defines a constant as 00.14 but ...
146	    NewStr = remove_leading_zeroes(Str),
147	    scan(G, BE, NewStr, Line, Out)
148    end;
149scan_number(G, BE, [X|Str], Accum, Line, Out) when ?is_number(X) ->
150    scan_number(G, BE, Str, [X|Accum], Line, Out);
151scan_number(G, BE, [X|Str], Accum, Line, Out) when X==$. ->
152    scan_frac(G, BE, Str, [X|Accum], Line, Out);
153scan_number(G, BE, [X|Str], Accum, Line, Out) when X==$e ; X==$e ->
154    scan_exp(G, BE, Str, [X|Accum], Line, Out);
155scan_number(G, BE, [X|Str], Accum, Line, Out) when X==$D ; X==$d ->
156    scan(G, BE, Str, Line, [{'<fixed_pt_literal>', Line,
157			 (lists:reverse(Accum))} | Out]);
158scan_number(G, BE, Str, Accum, Line, Out) ->
159    scan(G, BE, Str, Line, [{'<integer_literal>', Line,
160			 (lists:reverse(Accum))} | Out]).
161
162
163remove_leading_zeroes([$0|Rest]) ->
164    remove_leading_zeroes(Rest);
165remove_leading_zeroes(L) ->
166    L.
167
168scan_hex_number([X|Rest],Acc) when X >=$a, X =< $f ->
169    scan_hex_number(Rest,(Acc bsl 4) + (X - $a + 10));
170scan_hex_number([X|Rest],Acc) when X >=$A, X =< $F ->
171    scan_hex_number(Rest,(Acc bsl 4) + (X - $A + 10));
172scan_hex_number([X|Rest],Acc) when X >=$0, X =< $9 ->
173    scan_hex_number(Rest,(Acc bsl 4) + (X-$0));
174scan_hex_number(Rest,Acc) ->
175    {Acc,Rest}.
176
177pre_scan_number([$d|Rest], Acc, _) ->
178    {fixed, [$0|lists:reverse(Acc)], Rest};
179pre_scan_number([$D|Rest], Acc, _) ->
180    {fixed, [$0|lists:reverse(Acc)], Rest};
181pre_scan_number([$.|Rest], Acc, _) ->
182    %% Actually, we don't know if it's a float since it can be a fixed.
183    pre_scan_number(Rest, [$.|Acc], float);
184pre_scan_number([X|_], _Acc, _) when X == $E ; X ==$e  ->
185    %% Now we now it's a float.
186    float;
187pre_scan_number([X|Rest], Acc, Type) when ?is_number(X) ->
188    pre_scan_number(Rest, [X|Acc], Type);
189pre_scan_number(_Rest, _Acc, Type) ->
190    %% At this point we know it's a octal or float.
191    Type.
192
193scan_octal_number([X|Rest],Acc) when ?is_octal(X) ->
194    scan_octal_number(Rest,(Acc bsl 3) + (X-$0));
195scan_octal_number(Rest,Acc) ->
196    {Acc, Rest}.
197
198%% Floating point number scan.
199%%
200%%	Non trivial scan. A float consists of an integral part, a
201%%	decimal point, a fraction part, an e or E and a signed integer
202%%	exponent. Either the integer part or the fraction part but not
203%%	both may be missing, and either the decimal point or the
204%%	exponent part but not both may be missing. The exponent part
205%%	must consist of an e or E and a possibly signed exponent.
206%%
207%%	Analysis shows that "1." ".7" "1e2" ".5e-3" "1.7e2" "1.7e-2"
208%%	is allowed and "1" ".e9" is not. The sign is only allowed just
209%%	after an e or E. The scanner reads a number as an integer
210%%	until it encounters a "." so the integer part only error case
211%%	will not be caught in the scanner (but rather in expression
212%%	evaluation)
213
214scan_frac(G, _BE, [$e | _Str], [$.], Line, _Out) ->
215    ic_error:fatal_error(G, {illegal_float, Line});
216scan_frac(G, _BE, [$E | _Str], [$.], Line, _Out) ->
217    ic_error:fatal_error(G, {illegal_float, Line});
218scan_frac(G, BE, Str, Accum, Line, Out) ->
219    scan_frac2(G, BE, Str, Accum, Line, Out).
220
221scan_frac2(G, BE, [X|Str], Accum, Line, Out) when ?is_number(X) ->
222    scan_frac2(G, BE, Str, [X|Accum], Line, Out);
223scan_frac2(G, BE, [X|Str], Accum, Line, Out) when X==$e ; X==$E ->
224    scan_exp(G, BE, Str, [X|Accum], Line, Out);
225%% The following case is for fixed (e.g. 123.45d).
226scan_frac2(G, BE, [X|Str], Accum, Line, Out) when X==$d ; X==$D ->
227    scan(G, BE, Str, Line, [{'<fixed_pt_literal>', Line,
228			 (lists:reverse(Accum))} | Out]);
229scan_frac2(G, BE, Str, Accum, Line, Out) ->
230    scan(G, BE, Str, Line, [{'<floating_pt_literal>', Line,
231			 (lists:reverse(Accum))} | Out]).
232
233scan_exp(G, BE, [X|Str], Accum, Line, Out) when X==$- ->
234    scan_exp2(G, BE, Str, [X|Accum], Line, Out);
235scan_exp(G, BE, Str, Accum, Line, Out) ->
236    scan_exp2(G, BE, Str, Accum, Line, Out).
237
238scan_exp2(G, BE, [X|Str], Accum, Line, Out) when ?is_number(X) ->
239    scan_exp2(G, BE, Str, [X|Accum], Line, Out);
240scan_exp2(G, BE, Str, Accum, Line, Out) ->
241    scan(G, BE, Str, Line, [{'<floating_pt_literal>', Line,
242			 (lists:reverse(Accum))} | Out]).
243
244
245scan_name(G, BE, [X|Str], Accum, TypeCheck, Line, Out) when ?is_upper(X) ->
246    scan_name(G, BE, Str, [X|Accum], TypeCheck, Line, Out);
247scan_name(G, BE, [X|Str], Accum, TypeCheck, Line, Out) when ?is_lower(X) ->
248    scan_name(G, BE, Str, [X|Accum], TypeCheck, Line, Out);
249scan_name(G, BE, [X|Str], Accum, TypeCheck, Line, Out) when ?is_number(X) ->
250    scan_name(G, BE, Str, [X|Accum], TypeCheck, Line, Out);
251scan_name(G, BE, [$_|Str], Accum, TypeCheck, Line, Out) ->
252    scan_name(G, BE, Str, [$_|Accum], TypeCheck, Line, Out);
253scan_name(G, BE, S, Accum, false, Line, Out) ->
254    %% The CORBA 2.3 specification allows the user to override typechecking:
255    %% typedef string _native;
256    %% interface i {
257    %%     void foo(in _native VT);
258    %% };
259    %% BUT, the IFR-id remains the same ("IDL:native:1.0") etc. The reason for
260    %% this is that one don't have to re-write a large chunk of IDL- and
261    %% application-code.
262    scan(G, BE, S, Line, [{'<identifier>', Line, lists:reverse(Accum)} | Out]);
263scan_name(G, BE, S, Accum, _, Line, Out) ->
264    L = lists:reverse(Accum),
265    X = case is_reserved(L, BE) of
266	    undefined ->
267		{'<identifier>', Line, L};
268	    Yes ->
269		{Yes, Line}
270	end,
271    scan(G, BE, S, Line, [X | Out]).
272
273%% Shall scan a constant
274scan_const(G, BE, string, [$" | Rest], Accum, Line, [{'<string_literal>', _, Str}|Out]) ->
275    scan(G, BE, Rest, Line,
276	 [{'<string_literal>', Line, Str ++ lists:reverse(Accum)} | Out]);
277scan_const(G, BE, string, [$" | Rest], Accum, Line, Out) ->
278    scan(G, BE, Rest, Line,
279	 [{'<string_literal>', Line, lists:reverse(Accum)} | Out]);
280scan_const(G, BE, wstring, [$" | Rest], Accum, Line, [{'<wstring_literal>', _,Wstr}|Out]) -> %% WSTRING
281    scan(G, BE, Rest, Line,
282	 [{'<wstring_literal>', Line, Wstr ++ lists:reverse(Accum)} | Out]);
283scan_const(G, BE, wstring, [$" | Rest], Accum, Line, Out) -> %% WSTRING
284    scan(G, BE, Rest, Line,
285	 [{'<wstring_literal>', Line, lists:reverse(Accum)} | Out]);
286scan_const(G, _BE, string, [], _Accum, Line, Out) -> %% Bad string
287    ic_error:error(G, {bad_string, Line}),
288    Out;
289scan_const(G, _BE, wstring, [], _Accum, Line, Out) -> %% Bad WSTRING
290    ic_error:error(G, {bad_string, Line}),
291    Out;
292scan_const(G, BE, char, [$' | Rest], Accum, Line, Out) ->
293    scan(G, BE, Rest, Line,
294	 [{'<character_literal>', Line, lists:reverse(Accum)} | Out]);
295scan_const(G, BE, wchar, [$' | Rest], Accum, Line, Out) -> %% WCHAR
296    scan(G, BE, Rest, Line,
297	 [{'<wcharacter_literal>', Line, lists:reverse(Accum)} | Out]);
298scan_const(G, BE, Mode, [$\\, C | Rest], Accum, Line, Out) ->
299    case escaped_char(C) of
300	error ->
301	    ic_error:error(G, {bad_escape_character, Line, C}), %% Bad escape character
302	    scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out);
303	octal ->
304	    {Num,Rest2} = scan_octal_number([C|Rest], 0),
305	    scan_const(G, BE, Mode, Rest2, [Num|Accum], Line, Out);
306	hexadecimal ->
307	    {Num,Rest2} = scan_hex_number(Rest, 0),
308	    if
309		Num > 255 -> %% 16#FF
310		    ic_error:error(G, {bad_escape_character, Line, C}),
311		    scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out);
312		true ->
313		    scan_const(G, BE, Mode, Rest2, [Num|Accum], Line, Out)
314	    end;
315	unicode ->
316	    {Num,Rest2} = scan_hex_number(Rest, 0),
317	    if
318		Num > 65535 -> %% 16#FFFF
319		    ic_error:error(G, {bad_escape_character, Line, C}),
320		    scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out);
321		true ->
322		    scan_const(G, BE, Mode, Rest2, [Num|Accum], Line, Out)
323	    end;
324	EC ->
325	    scan_const(G, BE, Mode, Rest, [EC | Accum], Line, Out)
326    end;
327scan_const(G, BE, Mode, [C | Rest], Accum, Line, Out) ->
328    scan_const(G, BE, Mode, Rest, [C | Accum], Line, Out).
329
330
331%%
332%% Preprocessor output handling
333%%
334%%	gcc outputs a line with line number, file name (within \") and
335%%	one or more integer flags. The scanner scans the line number,
336%%	the id and all integers up to nl.
337%%
338%% NOTE: This will have to be enhanced in order to eat #pragma
339%%
340scan_preproc(G, BE, Str, Line, Out) ->
341    {List, Rest} = scan_to_nl(strip(Str), []),
342    NewLine = get_new_line_nr(strip(List), Line+1, []),
343    case scan_number(G, BE, List, [], Line, [{'#', Line} | Out]) of
344	L when is_list(L) ->
345	    scan(G, BE, Rest, NewLine, [{'#', Line} | L])
346    end.
347
348get_new_line_nr([C|R], Line, Acc) when C>=$0, C=<$9 ->
349    get_new_line_nr(R, Line, [C|Acc]);
350get_new_line_nr(_, Line, []) -> Line;		% No line nr found
351get_new_line_nr(_, _, Acc) -> list_to_integer(reverse(Acc)).
352
353scan_to_nl([], Acc) -> {reverse(Acc), []};
354scan_to_nl([$\n|Str], Acc) -> {reverse(Acc), Str};
355scan_to_nl([$\r|R], Acc) -> scan_to_nl(R, Acc);
356scan_to_nl([C|R], Acc) -> scan_to_nl(R, [C|Acc]).
357
358strip([$ |R]) -> strip(R);
359strip(L) -> L.
360
361%% Escaped character. Escaped chars are repr as two characters in the
362%% input list of letters and this is translated into one char.
363escaped_char($n) -> $\n;
364escaped_char($t) -> $\t;
365escaped_char($v) -> $\v;
366escaped_char($b) -> $\b;
367escaped_char($r) -> $ ;
368escaped_char($f) -> $\f;
369escaped_char($a) -> $\a;
370escaped_char($\\) -> $\\;
371escaped_char($?) -> $?;
372escaped_char($') -> $';
373escaped_char($") -> $";
374escaped_char($x) -> hexadecimal;
375escaped_char($u) -> unicode;
376escaped_char(X) when ?is_octal(X) -> octal;
377%% Error
378escaped_char(_Other) -> error.
379
380skip_to_nl([]) -> [];
381skip_to_nl([$\n | Str]) ->[$\n | Str];
382skip_to_nl([_|Str]) ->
383    skip_to_nl(Str).
384
385skip_comment([$\\, _ | Str]) ->
386    skip_comment(Str);
387skip_comment([$*, $/ | Str]) -> Str;
388skip_comment([_|Str]) ->
389    skip_comment(Str).
390
391
392%%----------------------------------------------------------------------
393%% Shall separate keywords from identifiers and numbers
394
395%% Fill in the ets of reserved words
396is_reserved("Object", _) ->     'Object';
397is_reserved("in", _) ->          in;
398is_reserved("interface", _) ->   interface;
399is_reserved("case", _) ->       'case';
400is_reserved("union", _) ->       union;
401is_reserved("struct", _) ->      struct;
402is_reserved("any", _) ->         any;
403is_reserved("long", _) ->        long;
404is_reserved("float", _) ->       float;
405is_reserved("out", _) ->         out;
406is_reserved("enum", _) ->        enum;
407is_reserved("double", _) ->      double;
408is_reserved("context", _) ->     context;
409is_reserved("oneway", _) ->      oneway;
410is_reserved("sequence", _) ->    sequence;
411is_reserved("FALSE", _) ->      'FALSE';
412is_reserved("readonly", _) ->    readonly;
413is_reserved("char", _) ->        char;
414is_reserved("wchar", _) ->       wchar;
415is_reserved("void", _) ->        void;
416is_reserved("inout", _) ->       inout;
417is_reserved("attribute", _) ->   attribute;
418is_reserved("octet", _) ->       octet;
419is_reserved("TRUE", _) ->       'TRUE';
420is_reserved("switch", _) ->      switch;
421is_reserved("unsigned", _) ->    unsigned;
422is_reserved("typedef", _) ->     typedef;
423is_reserved("const", _) ->       const;
424is_reserved("raises", _) ->      raises;
425is_reserved("string", _) ->      string;
426is_reserved("wstring", _) ->     wstring;
427is_reserved("default", _) ->     default;
428is_reserved("short", _) ->       short;
429is_reserved("module", _) ->      module;
430is_reserved("exception", _) ->   exception;
431is_reserved("boolean", _) ->     boolean;
432%% --- New keywords Introduced in CORBA-2.3.1 ---
433%% For now we cannot add these for all backends right now since it would cause
434%% some problems for at least one customer.
435is_reserved("fixed", BE) ->       check_be(BE, fixed);
436%is_reserved("abstract", BE) ->    check_be(BE, abstract);
437%is_reserved("custom", BE) ->      check_be(BE, custom);
438%is_reserved("factory", BE) ->     check_be(BE, factory);
439%is_reserved("local", BE) ->       check_be(BE, local);
440%is_reserved("native", BE) ->      check_be(BE, native);
441%is_reserved("private", BE) ->     check_be(BE, private);
442%is_reserved("public", BE) ->      check_be(BE, public);
443%is_reserved("supports", BE) ->    check_be(BE, supports);
444%is_reserved("truncatable", BE) -> check_be(BE, truncatable);
445%is_reserved("ValueBase", BE) ->   check_be(BE, 'ValueBase');
446%is_reserved("valuetype", BE) ->   check_be(BE, valuetype);
447is_reserved(_, _) -> undefined.
448
449check_be(erl_corba, KeyWord) ->
450    KeyWord;
451check_be(_, _) ->
452    undefined.
453
454