1%% ``Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
14%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
15%% AB. All Rights Reserved.''
16%%
17%%     $Id: asn1ct_tok.erl,v 1.1 2008/12/17 09:53:30 mikpe Exp $
18%%
19-module(asn1ct_tok).
20
21%% Tokenize ASN.1 code (input to parser generated with yecc)
22
23-export([get_name/2,tokenise/2, file/1]).
24
25
26file(File) ->
27    case file:open(File, [read])  of
28	{error, Reason} ->
29	    {error,{File,file:format_error(Reason)}};
30	{ok,Stream} ->
31	    process0(Stream)
32    end.
33
34process0(Stream) ->
35    process(Stream,0,[]).
36
37process(Stream,Lno,R) ->
38    process(io:get_line(Stream, ''), Stream,Lno+1,R).
39
40process(eof, Stream,Lno,R) ->
41    file:close(Stream),
42    lists:flatten(lists:reverse([{'$end',Lno}|R]));
43
44
45process(L, Stream,Lno,R) when list(L) ->
46    %%io:format('read:~s',[L]),
47    case catch tokenise(L,Lno) of
48	{'ERR',Reason} ->
49	    io:format("Tokeniser error on line: ~w ~w~n",[Lno,Reason]),
50	    exit(0);
51	T ->
52	    %%io:format('toks:~w~n',[T]),
53	    process(Stream,Lno,[T|R])
54    end.
55
56
57tokenise([H|T],Lno) when $a =< H , H =< $z ->
58    {X, T1} = get_name(T, [H]),
59    [{identifier,Lno, list_to_atom(X)}|tokenise(T1,Lno)];
60
61tokenise([$&,H|T],Lno) when $A =< H , H =< $Z ->
62    {Y, T1} = get_name(T, [H]),
63    X = list_to_atom(Y),
64    [{typefieldreference, Lno, X} | tokenise(T1, Lno)];
65
66tokenise([$&,H|T],Lno) when $a =< H , H =< $z ->
67    {Y, T1} = get_name(T, [H]),
68    X = list_to_atom(Y),
69    [{valuefieldreference, Lno, X} | tokenise(T1, Lno)];
70
71tokenise([H|T],Lno) when $A =< H , H =< $Z ->
72    {Y, T1} = get_name(T, [H]),
73    X = list_to_atom(Y),
74    case reserved_word(X) of
75	true ->
76	    [{X,Lno}|tokenise(T1,Lno)];
77	false ->
78	    [{typereference,Lno,X}|tokenise(T1,Lno)];
79	rstrtype ->
80	    [{restrictedcharacterstringtype,Lno,X}|tokenise(T1,Lno)]
81    end;
82
83tokenise([$-,H|T],Lno) when $0 =< H , H =< $9 ->
84    {X, T1} = get_number(T, [H]),
85    [{number,Lno,-1 * list_to_integer(X)}|tokenise(T1,Lno)];
86
87tokenise([H|T],Lno) when $0 =< H , H =< $9 ->
88    {X, T1} = get_number(T, [H]),
89    [{number,Lno,list_to_integer(X)}|tokenise(T1,Lno)];
90
91tokenise([$-,$-|T],Lno) ->
92    tokenise(skip_comment(T),Lno);
93tokenise([$:,$:,$=|T],Lno) ->
94    [{'::=',Lno}|tokenise(T,Lno)];
95
96tokenise([$'|T],Lno) ->
97    case catch collect_quoted(T,Lno,[]) of
98         {'ERR',_} ->
99             throw({'ERR','bad_quote'});
100         {Thing, T1} ->
101             [Thing|tokenise(T1,Lno)]
102    end;
103
104tokenise([$"|T],Lno) ->
105    collect_string(T,Lno);
106
107tokenise([${|T],Lno) ->
108    [{'{',Lno}|tokenise(T,Lno)];
109
110tokenise([$}|T],Lno) ->
111    [{'}',Lno}|tokenise(T,Lno)];
112
113tokenise([$]|T],Lno) ->
114    [{']',Lno}|tokenise(T,Lno)];
115
116tokenise([$[|T],Lno) ->
117    [{'[',Lno}|tokenise(T,Lno)];
118
119tokenise([$,|T],Lno) ->
120    [{',',Lno}|tokenise(T,Lno)];
121
122tokenise([$(|T],Lno) ->
123    [{'(',Lno}|tokenise(T,Lno)];
124tokenise([$)|T],Lno) ->
125    [{')',Lno}|tokenise(T,Lno)];
126
127tokenise([$.,$.,$.|T],Lno) ->
128    [{'...',Lno}|tokenise(T,Lno)];
129
130tokenise([$.,$.|T],Lno) ->
131    [{'..',Lno}|tokenise(T,Lno)];
132
133tokenise([$.|T],Lno) ->
134    [{'.',Lno}|tokenise(T,Lno)];
135tokenise([$^|T],Lno) ->
136    [{'^',Lno}|tokenise(T,Lno)];
137tokenise([$!|T],Lno) ->
138    [{'!',Lno}|tokenise(T,Lno)];
139tokenise([$||T],Lno) ->
140    [{'|',Lno}|tokenise(T,Lno)];
141
142
143tokenise([H|T],Lno) ->
144    case white_space(H) of
145	true ->
146	    tokenise(T,Lno);
147	false ->
148	    [{list_to_atom([H]),Lno}|tokenise(T,Lno)]
149    end;
150tokenise([],_) ->
151    [].
152
153
154collect_string(L,Lno) ->
155    collect_string(L,Lno,[]).
156
157collect_string([],_,_) ->
158    throw({'ERR','bad_quote found eof'});
159
160collect_string([H|T],Lno,Str) ->
161    case H of
162	$" ->
163           [{cstring,1,lists:reverse(Str)}|tokenise(T,Lno)];
164        Ch ->
165           collect_string(T,Lno,[Ch|Str])
166    end.
167
168
169
170% <name> is letters digits hyphens
171% hypen is not the last character. Hypen hyphen is NOT allowed
172%
173% <identifier> ::= <lowercase> <name>
174
175get_name([$-,Char|T], L) ->
176    case isalnum(Char) of
177	true ->
178	    get_name(T,[Char,$-|L]);
179	false ->
180	    {lists:reverse(L),[$-,Char|T]}
181    end;
182get_name([$-|T], L) ->
183    {lists:reverse(L),[$-|T]};
184get_name([Char|T], L) ->
185    case isalnum(Char) of
186	true ->
187	    get_name(T,[Char|L]);
188	false ->
189	    {lists:reverse(L),[Char|T]}
190    end;
191get_name([], L) ->
192    {lists:reverse(L), []}.
193
194
195isalnum(H) when $A =< H , H =< $Z ->
196    true;
197isalnum(H) when $a =< H , H =< $z ->
198    true;
199isalnum(H) when $0 =< H , H =< $9 ->
200    true;
201isalnum(_) ->
202    false.
203
204isdigit(H) when $0 =< H , H =< $9 ->
205    true;
206isdigit(_) ->
207    false.
208
209white_space(9) -> true;
210white_space(10) -> true;
211white_space(13) -> true;
212white_space(32) -> true;
213white_space(_) -> false.
214
215
216get_number([H|T], L) ->
217    case isdigit(H) of
218	true ->
219	    get_number(T, [H|L]);
220	false ->
221	    {lists:reverse(L), [H|T]}
222    end;
223get_number([], L) ->
224    {lists:reverse(L), []}.
225
226skip_comment([]) ->
227    [];
228skip_comment([$-,$-|T]) ->
229    T;
230skip_comment([_|T]) ->
231    skip_comment(T).
232
233collect_quoted([$',$B|T],Lno, L) ->
234    case check_bin(L) of
235        true ->
236            {{bstring,Lno, lists:reverse(L)}, T};
237        false ->
238            throw({'ERR',{invalid_binary_number, lists:reverse(L)}})
239    end;
240collect_quoted([$',$H|T],Lno, L) ->
241    case check_hex(L) of
242        true ->
243            {{hstring,Lno, lists:reverse(L)}, T};
244        false ->
245            throw({'ERR',{invalid_binary_number, lists:reverse(L)}})
246    end;
247collect_quoted([H|T], Lno, L) ->
248    collect_quoted(T, Lno,[H|L]);
249collect_quoted([], _, _) ->        % This should be allowed FIX later
250    throw({'ERR',{eol_in_token}}).
251
252check_bin([$0|T]) ->
253    check_bin(T);
254check_bin([$1|T]) ->
255    check_bin(T);
256check_bin([]) ->
257    true;
258check_bin(_) ->
259    false.
260
261check_hex([H|T]) when $0 =< H , H =< $9 ->
262    check_hex(T);
263check_hex([H|T])  when $A =< H , H =< $F ->
264    check_hex(T);
265check_hex([]) ->
266    true;
267check_hex(_) ->
268    false.
269
270
271%% reserved_word(A) -> true|false|rstrtype
272%% A = atom()
273%% returns true if A is a reserved ASN.1 word
274%% returns false if A is not a reserved word
275%% returns rstrtype if A is a reserved word in the group
276%% 	RestrictedCharacterStringType
277reserved_word('ABSENT') -> true;
278%reserved_word('ABSTRACT-SYNTAX') -> true; % impl as predef item
279reserved_word('ALL') -> true;
280reserved_word('ANY') -> true;
281reserved_word('APPLICATION') -> true;
282reserved_word('AUTOMATIC') -> true;
283reserved_word('BEGIN') -> true;
284reserved_word('BIT') -> true;
285reserved_word('BMPString') -> rstrtype;
286reserved_word('BOOLEAN') -> true;
287reserved_word('BY') -> true;
288reserved_word('CHARACTER') -> true;
289reserved_word('CHOICE') -> true;
290reserved_word('CLASS') -> true;
291reserved_word('COMPONENT') -> true;
292reserved_word('COMPONENTS') -> true;
293reserved_word('CONSTRAINED') -> true;
294reserved_word('DEFAULT') -> true;
295reserved_word('DEFINED') -> true;
296reserved_word('DEFINITIONS') -> true;
297reserved_word('EMBEDDED') -> true;
298reserved_word('END') -> true;
299reserved_word('ENUMERATED') -> true;
300reserved_word('EXCEPT') -> true;
301reserved_word('EXPLICIT') -> true;
302reserved_word('EXPORTS') -> true;
303reserved_word('EXTERNAL') -> true;
304reserved_word('FALSE') -> true;
305reserved_word('FROM') -> true;
306reserved_word('GeneralizedTime') -> true;
307reserved_word('GeneralString') -> rstrtype;
308reserved_word('GraphicString') -> rstrtype;
309reserved_word('IA5String') -> rstrtype;
310% reserved_word('TYPE-IDENTIFIER') -> true; % impl as predef item
311reserved_word('IDENTIFIER') -> true;
312reserved_word('IMPLICIT') -> true;
313reserved_word('IMPORTS') -> true;
314reserved_word('INCLUDES') -> true;
315reserved_word('INSTANCE') -> true;
316reserved_word('INTEGER') -> true;
317reserved_word('INTERSECTION') -> true;
318reserved_word('ISO646String') -> rstrtype;
319reserved_word('MAX') -> true;
320reserved_word('MIN') -> true;
321reserved_word('MINUS-INFINITY') -> true;
322reserved_word('NULL') -> true;
323reserved_word('NumericString') -> rstrtype;
324reserved_word('OBJECT') -> true;
325reserved_word('ObjectDescriptor') -> true;
326reserved_word('OCTET') -> true;
327reserved_word('OF') -> true;
328reserved_word('OPTIONAL') -> true;
329reserved_word('PDV') -> true;
330reserved_word('PLUS-INFINITY') -> true;
331reserved_word('PRESENT') -> true;
332reserved_word('PrintableString') -> rstrtype;
333reserved_word('PRIVATE') -> true;
334reserved_word('REAL') -> true;
335reserved_word('SEQUENCE') -> true;
336reserved_word('SET') -> true;
337reserved_word('SIZE') -> true;
338reserved_word('STRING') -> true;
339reserved_word('SYNTAX') -> true;
340reserved_word('T61String') -> rstrtype;
341reserved_word('TAGS') -> true;
342reserved_word('TeletexString') -> rstrtype;
343reserved_word('TRUE') -> true;
344reserved_word('UNION') -> true;
345reserved_word('UNIQUE') -> true;
346reserved_word('UNIVERSAL') -> true;
347reserved_word('UniversalString') -> rstrtype;
348reserved_word('UTCTime') -> true;
349reserved_word('VideotexString') -> rstrtype;
350reserved_word('VisibleString') -> rstrtype;
351reserved_word('WITH') -> true;
352reserved_word(_) -> false.
353