1%% ``Licensed under the Apache License, Version 2.0 (the "License"); 2%% you may not use this file except in compliance with the License. 3%% You may obtain a copy of the License at 4%% 5%% http://www.apache.org/licenses/LICENSE-2.0 6%% 7%% Unless required by applicable law or agreed to in writing, software 8%% distributed under the License is distributed on an "AS IS" BASIS, 9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10%% See the License for the specific language governing permissions and 11%% limitations under the License. 12%% 13%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. 14%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings 15%% AB. All Rights Reserved.'' 16%% 17%% $Id: asn1ct_tok.erl,v 1.1 2008/12/17 09:53:30 mikpe Exp $ 18%% 19-module(asn1ct_tok). 20 21%% Tokenize ASN.1 code (input to parser generated with yecc) 22 23-export([get_name/2,tokenise/2, file/1]). 24 25 26file(File) -> 27 case file:open(File, [read]) of 28 {error, Reason} -> 29 {error,{File,file:format_error(Reason)}}; 30 {ok,Stream} -> 31 process0(Stream) 32 end. 33 34process0(Stream) -> 35 process(Stream,0,[]). 36 37process(Stream,Lno,R) -> 38 process(io:get_line(Stream, ''), Stream,Lno+1,R). 39 40process(eof, Stream,Lno,R) -> 41 file:close(Stream), 42 lists:flatten(lists:reverse([{'$end',Lno}|R])); 43 44 45process(L, Stream,Lno,R) when list(L) -> 46 %%io:format('read:~s',[L]), 47 case catch tokenise(L,Lno) of 48 {'ERR',Reason} -> 49 io:format("Tokeniser error on line: ~w ~w~n",[Lno,Reason]), 50 exit(0); 51 T -> 52 %%io:format('toks:~w~n',[T]), 53 process(Stream,Lno,[T|R]) 54 end. 55 56 57tokenise([H|T],Lno) when $a =< H , H =< $z -> 58 {X, T1} = get_name(T, [H]), 59 [{identifier,Lno, list_to_atom(X)}|tokenise(T1,Lno)]; 60 61tokenise([$&,H|T],Lno) when $A =< H , H =< $Z -> 62 {Y, T1} = get_name(T, [H]), 63 X = list_to_atom(Y), 64 [{typefieldreference, Lno, X} | tokenise(T1, Lno)]; 65 66tokenise([$&,H|T],Lno) when $a =< H , H =< $z -> 67 {Y, T1} = get_name(T, [H]), 68 X = list_to_atom(Y), 69 [{valuefieldreference, Lno, X} | tokenise(T1, Lno)]; 70 71tokenise([H|T],Lno) when $A =< H , H =< $Z -> 72 {Y, T1} = get_name(T, [H]), 73 X = list_to_atom(Y), 74 case reserved_word(X) of 75 true -> 76 [{X,Lno}|tokenise(T1,Lno)]; 77 false -> 78 [{typereference,Lno,X}|tokenise(T1,Lno)]; 79 rstrtype -> 80 [{restrictedcharacterstringtype,Lno,X}|tokenise(T1,Lno)] 81 end; 82 83tokenise([$-,H|T],Lno) when $0 =< H , H =< $9 -> 84 {X, T1} = get_number(T, [H]), 85 [{number,Lno,-1 * list_to_integer(X)}|tokenise(T1,Lno)]; 86 87tokenise([H|T],Lno) when $0 =< H , H =< $9 -> 88 {X, T1} = get_number(T, [H]), 89 [{number,Lno,list_to_integer(X)}|tokenise(T1,Lno)]; 90 91tokenise([$-,$-|T],Lno) -> 92 tokenise(skip_comment(T),Lno); 93tokenise([$:,$:,$=|T],Lno) -> 94 [{'::=',Lno}|tokenise(T,Lno)]; 95 96tokenise([$'|T],Lno) -> 97 case catch collect_quoted(T,Lno,[]) of 98 {'ERR',_} -> 99 throw({'ERR','bad_quote'}); 100 {Thing, T1} -> 101 [Thing|tokenise(T1,Lno)] 102 end; 103 104tokenise([$"|T],Lno) -> 105 collect_string(T,Lno); 106 107tokenise([${|T],Lno) -> 108 [{'{',Lno}|tokenise(T,Lno)]; 109 110tokenise([$}|T],Lno) -> 111 [{'}',Lno}|tokenise(T,Lno)]; 112 113tokenise([$]|T],Lno) -> 114 [{']',Lno}|tokenise(T,Lno)]; 115 116tokenise([$[|T],Lno) -> 117 [{'[',Lno}|tokenise(T,Lno)]; 118 119tokenise([$,|T],Lno) -> 120 [{',',Lno}|tokenise(T,Lno)]; 121 122tokenise([$(|T],Lno) -> 123 [{'(',Lno}|tokenise(T,Lno)]; 124tokenise([$)|T],Lno) -> 125 [{')',Lno}|tokenise(T,Lno)]; 126 127tokenise([$.,$.,$.|T],Lno) -> 128 [{'...',Lno}|tokenise(T,Lno)]; 129 130tokenise([$.,$.|T],Lno) -> 131 [{'..',Lno}|tokenise(T,Lno)]; 132 133tokenise([$.|T],Lno) -> 134 [{'.',Lno}|tokenise(T,Lno)]; 135tokenise([$^|T],Lno) -> 136 [{'^',Lno}|tokenise(T,Lno)]; 137tokenise([$!|T],Lno) -> 138 [{'!',Lno}|tokenise(T,Lno)]; 139tokenise([$||T],Lno) -> 140 [{'|',Lno}|tokenise(T,Lno)]; 141 142 143tokenise([H|T],Lno) -> 144 case white_space(H) of 145 true -> 146 tokenise(T,Lno); 147 false -> 148 [{list_to_atom([H]),Lno}|tokenise(T,Lno)] 149 end; 150tokenise([],_) -> 151 []. 152 153 154collect_string(L,Lno) -> 155 collect_string(L,Lno,[]). 156 157collect_string([],_,_) -> 158 throw({'ERR','bad_quote found eof'}); 159 160collect_string([H|T],Lno,Str) -> 161 case H of 162 $" -> 163 [{cstring,1,lists:reverse(Str)}|tokenise(T,Lno)]; 164 Ch -> 165 collect_string(T,Lno,[Ch|Str]) 166 end. 167 168 169 170% <name> is letters digits hyphens 171% hypen is not the last character. Hypen hyphen is NOT allowed 172% 173% <identifier> ::= <lowercase> <name> 174 175get_name([$-,Char|T], L) -> 176 case isalnum(Char) of 177 true -> 178 get_name(T,[Char,$-|L]); 179 false -> 180 {lists:reverse(L),[$-,Char|T]} 181 end; 182get_name([$-|T], L) -> 183 {lists:reverse(L),[$-|T]}; 184get_name([Char|T], L) -> 185 case isalnum(Char) of 186 true -> 187 get_name(T,[Char|L]); 188 false -> 189 {lists:reverse(L),[Char|T]} 190 end; 191get_name([], L) -> 192 {lists:reverse(L), []}. 193 194 195isalnum(H) when $A =< H , H =< $Z -> 196 true; 197isalnum(H) when $a =< H , H =< $z -> 198 true; 199isalnum(H) when $0 =< H , H =< $9 -> 200 true; 201isalnum(_) -> 202 false. 203 204isdigit(H) when $0 =< H , H =< $9 -> 205 true; 206isdigit(_) -> 207 false. 208 209white_space(9) -> true; 210white_space(10) -> true; 211white_space(13) -> true; 212white_space(32) -> true; 213white_space(_) -> false. 214 215 216get_number([H|T], L) -> 217 case isdigit(H) of 218 true -> 219 get_number(T, [H|L]); 220 false -> 221 {lists:reverse(L), [H|T]} 222 end; 223get_number([], L) -> 224 {lists:reverse(L), []}. 225 226skip_comment([]) -> 227 []; 228skip_comment([$-,$-|T]) -> 229 T; 230skip_comment([_|T]) -> 231 skip_comment(T). 232 233collect_quoted([$',$B|T],Lno, L) -> 234 case check_bin(L) of 235 true -> 236 {{bstring,Lno, lists:reverse(L)}, T}; 237 false -> 238 throw({'ERR',{invalid_binary_number, lists:reverse(L)}}) 239 end; 240collect_quoted([$',$H|T],Lno, L) -> 241 case check_hex(L) of 242 true -> 243 {{hstring,Lno, lists:reverse(L)}, T}; 244 false -> 245 throw({'ERR',{invalid_binary_number, lists:reverse(L)}}) 246 end; 247collect_quoted([H|T], Lno, L) -> 248 collect_quoted(T, Lno,[H|L]); 249collect_quoted([], _, _) -> % This should be allowed FIX later 250 throw({'ERR',{eol_in_token}}). 251 252check_bin([$0|T]) -> 253 check_bin(T); 254check_bin([$1|T]) -> 255 check_bin(T); 256check_bin([]) -> 257 true; 258check_bin(_) -> 259 false. 260 261check_hex([H|T]) when $0 =< H , H =< $9 -> 262 check_hex(T); 263check_hex([H|T]) when $A =< H , H =< $F -> 264 check_hex(T); 265check_hex([]) -> 266 true; 267check_hex(_) -> 268 false. 269 270 271%% reserved_word(A) -> true|false|rstrtype 272%% A = atom() 273%% returns true if A is a reserved ASN.1 word 274%% returns false if A is not a reserved word 275%% returns rstrtype if A is a reserved word in the group 276%% RestrictedCharacterStringType 277reserved_word('ABSENT') -> true; 278%reserved_word('ABSTRACT-SYNTAX') -> true; % impl as predef item 279reserved_word('ALL') -> true; 280reserved_word('ANY') -> true; 281reserved_word('APPLICATION') -> true; 282reserved_word('AUTOMATIC') -> true; 283reserved_word('BEGIN') -> true; 284reserved_word('BIT') -> true; 285reserved_word('BMPString') -> rstrtype; 286reserved_word('BOOLEAN') -> true; 287reserved_word('BY') -> true; 288reserved_word('CHARACTER') -> true; 289reserved_word('CHOICE') -> true; 290reserved_word('CLASS') -> true; 291reserved_word('COMPONENT') -> true; 292reserved_word('COMPONENTS') -> true; 293reserved_word('CONSTRAINED') -> true; 294reserved_word('DEFAULT') -> true; 295reserved_word('DEFINED') -> true; 296reserved_word('DEFINITIONS') -> true; 297reserved_word('EMBEDDED') -> true; 298reserved_word('END') -> true; 299reserved_word('ENUMERATED') -> true; 300reserved_word('EXCEPT') -> true; 301reserved_word('EXPLICIT') -> true; 302reserved_word('EXPORTS') -> true; 303reserved_word('EXTERNAL') -> true; 304reserved_word('FALSE') -> true; 305reserved_word('FROM') -> true; 306reserved_word('GeneralizedTime') -> true; 307reserved_word('GeneralString') -> rstrtype; 308reserved_word('GraphicString') -> rstrtype; 309reserved_word('IA5String') -> rstrtype; 310% reserved_word('TYPE-IDENTIFIER') -> true; % impl as predef item 311reserved_word('IDENTIFIER') -> true; 312reserved_word('IMPLICIT') -> true; 313reserved_word('IMPORTS') -> true; 314reserved_word('INCLUDES') -> true; 315reserved_word('INSTANCE') -> true; 316reserved_word('INTEGER') -> true; 317reserved_word('INTERSECTION') -> true; 318reserved_word('ISO646String') -> rstrtype; 319reserved_word('MAX') -> true; 320reserved_word('MIN') -> true; 321reserved_word('MINUS-INFINITY') -> true; 322reserved_word('NULL') -> true; 323reserved_word('NumericString') -> rstrtype; 324reserved_word('OBJECT') -> true; 325reserved_word('ObjectDescriptor') -> true; 326reserved_word('OCTET') -> true; 327reserved_word('OF') -> true; 328reserved_word('OPTIONAL') -> true; 329reserved_word('PDV') -> true; 330reserved_word('PLUS-INFINITY') -> true; 331reserved_word('PRESENT') -> true; 332reserved_word('PrintableString') -> rstrtype; 333reserved_word('PRIVATE') -> true; 334reserved_word('REAL') -> true; 335reserved_word('SEQUENCE') -> true; 336reserved_word('SET') -> true; 337reserved_word('SIZE') -> true; 338reserved_word('STRING') -> true; 339reserved_word('SYNTAX') -> true; 340reserved_word('T61String') -> rstrtype; 341reserved_word('TAGS') -> true; 342reserved_word('TeletexString') -> rstrtype; 343reserved_word('TRUE') -> true; 344reserved_word('UNION') -> true; 345reserved_word('UNIQUE') -> true; 346reserved_word('UNIVERSAL') -> true; 347reserved_word('UniversalString') -> rstrtype; 348reserved_word('UTCTime') -> true; 349reserved_word('VideotexString') -> rstrtype; 350reserved_word('VisibleString') -> rstrtype; 351reserved_word('WITH') -> true; 352reserved_word(_) -> false. 353