1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1997-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% 21-module(asn1ct_tok). 22 23%% Tokenize ASN.1 code (input to parser generated with yecc) 24 25-export([file/1,format_error/1]). 26 27file(File0) -> 28 case file:open(File0, [read]) of 29 {error, Reason} -> 30 {error,{File0,file:format_error(Reason)}}; 31 {ok,Stream} -> 32 try 33 process(Stream, 1, []) 34 catch 35 throw:{error,Line,Reason} -> 36 File = filename:basename(File0), 37 Error = {structured_error,{File,Line},?MODULE,Reason}, 38 {error,[Error]} 39 end 40 end. 41 42process(Stream, Lno, R) -> 43 process(io:get_line(Stream, ''), Stream, Lno, R). 44 45process(eof, Stream, Lno, Acc) -> 46 ok = file:close(Stream), 47 lists:reverse([{'$end',Lno}|Acc]); 48process(L, Stream, Lno0, Acc) when is_list(L) -> 49 try tokenise(Stream, L, Lno0, []) of 50 {Lno,[]} -> 51 process(Stream, Lno, Acc); 52 {Lno,Ts} -> 53 process(Stream, Lno, Ts++Acc) 54 catch 55 throw:{error,Reason} -> 56 throw({error,Lno0,Reason}) 57 end. 58 59format_error(eof_in_comment) -> 60 "premature end of file in multi-line comment"; 61format_error(eol_in_token) -> 62 "end of line in token"; 63format_error({invalid_binary_number,Str}) -> 64 io_lib:format("invalid binary number: '~s'", [Str]); 65format_error({invalid_hex_number,Str}) -> 66 io_lib:format("invalid hex number: '~s'", [Str]); 67format_error(Other) -> 68 io_lib:format("~p", [Other]). 69 70tokenise(Stream, [$&,H|T], Lno, R) when $A =< H , H =< $Z -> 71 {X,T1} = get_name(T, [H]), 72 tokenise(Stream, T1, Lno, [{typefieldreference,Lno,X}|R]); 73tokenise(Stream, [$&,H|T], Lno, R) when $a =< H , H =< $z -> 74 {X,T1} = get_name(T, [H]), 75 tokenise(Stream, T1, Lno, [{valuefieldreference,Lno,X}|R]); 76 77tokenise(Stream, "--"++T, Lno, R) -> 78 tokenise(Stream, skip_comment(T), Lno, R); 79 80tokenise(Stream, [$-,H|T], Lno, R) when $0 =< H , H =< $9 -> 81 {X, T1} = get_number(T, [H]), 82 tokenise(Stream, T1, Lno, [{number,Lno,-list_to_integer(X)}|R]); 83 84tokenise(Stream, "/*"++T, Lno0, R) -> 85 {Lno,T1} = skip_multiline_comment(Stream, T, Lno0, 0), 86 tokenise(Stream, T1, Lno, R); 87 88tokenise(Stream, "::="++T, Lno, R) -> 89 tokenise(Stream, T, Lno, [{'::=',Lno}|R]); 90tokenise(Stream, ":"++T, Lno, R) -> 91 tokenise(Stream, T, Lno, [{':',Lno}|R]); 92 93tokenise(Stream, "'"++T0, Lno, R) -> 94 {Thing, T1} = collect_quoted(T0, Lno, []), 95 tokenise(Stream, T1, Lno, [Thing|R]); 96tokenise(Stream,[$"|T],Lno,R) -> 97 {Str,T1} = collect_string(T,Lno), 98 tokenise(Stream,T1,Lno,[Str|R]); 99 100tokenise(Stream, "{"++T, Lno, R) -> 101 tokenise(Stream, T, Lno, [{'{',Lno}|R]); 102tokenise(Stream, "}"++T, Lno, R) -> 103 tokenise(Stream, T, Lno, [{'}',Lno}|R]); 104 105%% Even though x.680 specify '[[' and ']]' as lexical items 106%% it does not work to have them as such since the single '[' and ']' can 107%% be used beside each other in 'WITH SYNTAX' in x.681. 108%% The solution chosen here, i.e. to have them as separate lexical items 109%% will not detect the cases where there is white space between them 110%% which would be an error in the use in ExtensionAdditionGroups. 111 112tokenise(Stream, "]"++T, Lno, R) -> 113 tokenise(Stream, T, Lno, [{']',Lno}|R]); 114tokenise(Stream, "["++T,Lno,R) -> 115 tokenise(Stream, T, Lno, [{'[',Lno}|R]); 116 117tokenise(Stream, ","++T,Lno,R) -> 118 tokenise(Stream, T, Lno, [{',',Lno}|R]); 119 120tokenise(Stream, "("++T, Lno, R) -> 121 tokenise(Stream, T, Lno, [{'(',Lno}|R]); 122tokenise(Stream, ")"++T, Lno, R) -> 123 tokenise(Stream, T, Lno, [{')',Lno}|R]); 124 125tokenise(Stream, "..."++T,Lno,R) -> 126 tokenise(Stream, T, Lno, [{'...',Lno}|R]); 127tokenise(Stream, ".."++T, Lno, R) -> 128 tokenise(Stream, T, Lno, [{'..',Lno}|R]); 129tokenise(Stream, "."++T, Lno, R) -> 130 tokenise(Stream, T, Lno, [{'.',Lno}|R]); 131 132tokenise(Stream, "|"++T, Lno, R) -> 133 tokenise(Stream, T, Lno, [{'|',Lno}|R]); 134 135tokenise(Stream, [H|T], Lno, R) when $A =< H , H =< $Z -> 136 {X,T1} = get_name(T, [H]), 137 case reserved_word(X) of 138 true -> 139 tokenise(Stream, T1, Lno, [{X,Lno}|R]); 140 false -> 141 tokenise(Stream, T1, Lno, [{typereference,Lno,X}|R]); 142 rstrtype -> 143 tokenise(Stream, T1, Lno, [{restrictedcharacterstringtype,Lno,X}|R]) 144 end; 145 146tokenise(Stream, [H|T], Lno, R) when $a =< H , H =< $z -> 147 {X, T1} = get_name(T, [H]), 148 tokenise(Stream, T1, Lno, [{identifier,Lno,X}|R]); 149 150tokenise(Stream, [H|T], Lno, R) when $0 =< H , H =< $9 -> 151 {X, T1} = get_number(T, [H]), 152 tokenise(Stream, T1, Lno, [{number,Lno,list_to_integer(X)}|R]); 153 154tokenise(Stream, [H|T], Lno, R) when H =< $\s -> 155 tokenise(Stream, T, Lno, R); 156 157tokenise(Stream, [H|T], Lno, R) -> 158 tokenise(Stream, T, Lno, [{list_to_atom([H]),Lno}|R]); 159tokenise(_Stream, [], Lno, R) -> 160 {Lno+1,R}. 161 162 163collect_string(L, Lno) -> 164 collect_string(L, Lno, []). 165 166collect_string([$"|T], _Lno, Str) -> 167 {{cstring,1,lists:reverse(Str)},T}; 168collect_string([H|T], Lno, Str) -> 169 collect_string(T, Lno, [H|Str]); 170collect_string([], _, _) -> 171 throw({error,missing_quote_at_eof}). 172 173%% <name> is letters digits hyphens. 174%% Hypen is not the last character. Hypen hyphen is NOT allowed. 175%% 176%% <identifier> ::= <lowercase> <name> 177 178get_name([$-,Char|T]=T0, Acc) -> 179 case isalnum(Char) of 180 true -> 181 get_name(T, [Char,$-|Acc]); 182 false -> 183 {list_to_atom(lists:reverse(Acc)),T0} 184 end; 185get_name([$-|_]=T, Acc) -> 186 {list_to_atom(lists:reverse(Acc)),T}; 187get_name([Char|T]=T0, Acc) -> 188 case isalnum(Char) of 189 true -> 190 get_name(T, [Char|Acc]); 191 false -> 192 {list_to_atom(lists:reverse(Acc)),T0} 193 end; 194get_name([], Acc) -> 195 {list_to_atom(lists:reverse(Acc)),[]}. 196 197isalnum(H) when $A =< H , H =< $Z -> 198 true; 199isalnum(H) when $a =< H , H =< $z -> 200 true; 201isalnum(H) when $0 =< H , H =< $9 -> 202 true; 203isalnum(_) -> 204 false. 205 206isdigit(H) when $0 =< H , H =< $9 -> 207 true; 208isdigit(_) -> 209 false. 210 211get_number([H|T]=T0, L) -> 212 case isdigit(H) of 213 true -> 214 get_number(T, [H|L]); 215 false -> 216 {lists:reverse(L), T0} 217 end; 218get_number([], L) -> 219 {lists:reverse(L), []}. 220 221skip_comment([]) -> []; 222skip_comment("--"++T) -> T; 223skip_comment([_|T]) -> skip_comment(T). 224 225skip_multiline_comment(Stream, [], Lno, Level) -> 226 case io:get_line(Stream, '') of 227 eof -> 228 throw({error,eof_in_comment}); 229 Line -> 230 skip_multiline_comment(Stream, Line, Lno+1, Level) 231 end; 232skip_multiline_comment(_Stream, "*/"++T, Lno, 0) -> 233 {Lno,T}; 234skip_multiline_comment(Stream, "*/"++T, Lno, Level) -> 235 skip_multiline_comment(Stream, T, Lno, Level - 1); 236skip_multiline_comment(Stream, "/*"++T, Lno, Level) -> 237 skip_multiline_comment(Stream, T, Lno, Level + 1); 238skip_multiline_comment(Stream, [_|T], Lno, Level) -> 239 skip_multiline_comment(Stream, T, Lno, Level). 240 241collect_quoted("'B"++T, Lno, L) -> 242 case validate_bin(L) of 243 {ok, Bin} -> 244 {{bstring,Lno,Bin}, T}; 245 false -> 246 throw({error,{invalid_binary_number,lists:reverse(L)}}) 247 end; 248collect_quoted("'H"++T, Lno, L) -> 249 case validate_hex(L) of 250 {ok, Hex} -> 251 {{hstring,Lno,Hex}, T}; 252 false -> 253 throw({error,{invalid_hex_number,lists:reverse(L)}}) 254 end; 255collect_quoted([H|T], Lno, L) -> 256 collect_quoted(T, Lno,[H|L]); 257collect_quoted([], _, _) -> % This should be allowed FIX later 258 throw({error,eol_in_token}). 259 260validate_bin(L) -> 261 validate_bin(L,[]). 262 263validate_bin([H|T], A) when H =:= $0; H =:= $1 -> 264 validate_bin(T, [H|A]); 265validate_bin([$\s|T], A) -> 266 validate_bin(T, A); 267validate_bin([_|_], _) -> 268 false; 269validate_bin([], A) -> 270 {ok, A}. 271 272validate_hex(L) -> 273 validate_hex(L,[]). 274 275validate_hex([H|T], A) when $0 =< H , H =< $9 -> 276 validate_hex(T, [H|A]); 277validate_hex([H|T], A) when $A =< H , H =< $F -> 278 validate_hex(T, [H|A]); 279validate_hex([$\s|T], A) -> 280 validate_hex(T, A); 281validate_hex([_|_], _) -> 282 false; 283validate_hex([], A) -> 284 {ok, A}. 285 286%% reserved_word(A) -> true|false|rstrtype 287%% A = atom() 288%% returns true if A is a reserved ASN.1 word 289%% returns false if A is not a reserved word 290%% returns rstrtype if A is a reserved word in the group 291%% RestrictedCharacterStringType 292reserved_word('ABSENT') -> true; 293reserved_word('ALL') -> true; 294reserved_word('ANY') -> true; 295reserved_word('APPLICATION') -> true; 296reserved_word('AUTOMATIC') -> true; 297reserved_word('BEGIN') -> true; 298reserved_word('BIT') -> true; 299reserved_word('BMPString') -> rstrtype; 300reserved_word('BOOLEAN') -> true; 301reserved_word('BY') -> true; 302reserved_word('CHARACTER') -> true; 303reserved_word('CHOICE') -> true; 304reserved_word('CLASS') -> true; 305reserved_word('COMPONENT') -> true; 306reserved_word('COMPONENTS') -> true; 307reserved_word('CONSTRAINED') -> true; 308reserved_word('CONTAINING') -> true; 309reserved_word('DEFAULT') -> true; 310reserved_word('DEFINED') -> true; % not present in X.680 07/2002 311reserved_word('DEFINITIONS') -> true; 312reserved_word('EMBEDDED') -> true; 313reserved_word('ENCODED') -> true; 314reserved_word('END') -> true; 315reserved_word('ENUMERATED') -> true; 316reserved_word('EXCEPT') -> true; 317reserved_word('EXPLICIT') -> true; 318reserved_word('EXPORTS') -> true; 319reserved_word('EXTENSIBILITY') -> true; 320reserved_word('EXTERNAL') -> true; 321reserved_word('FALSE') -> true; 322reserved_word('FROM') -> true; 323reserved_word('GeneralizedTime') -> true; 324reserved_word('GeneralString') -> rstrtype; 325reserved_word('GraphicString') -> rstrtype; 326reserved_word('IA5String') -> rstrtype; 327reserved_word('IDENTIFIER') -> true; 328reserved_word('IMPLICIT') -> true; 329reserved_word('IMPLIED') -> true; 330reserved_word('IMPORTS') -> true; 331reserved_word('INCLUDES') -> true; 332reserved_word('INSTANCE') -> true; 333reserved_word('INTEGER') -> true; 334reserved_word('INTERSECTION') -> true; 335reserved_word('MAX') -> true; 336reserved_word('MIN') -> true; 337reserved_word('MINUS-INFINITY') -> true; 338reserved_word('NULL') -> true; 339reserved_word('NumericString') -> rstrtype; 340reserved_word('OBJECT') -> true; 341reserved_word('ObjectDescriptor') -> true; 342reserved_word('OCTET') -> true; 343reserved_word('OF') -> true; 344reserved_word('OPTIONAL') -> true; 345reserved_word('PATTERN') -> true; 346reserved_word('PDV') -> true; 347reserved_word('PLUS-INFINITY') -> true; 348reserved_word('PRESENT') -> true; 349reserved_word('PrintableString') -> rstrtype; 350reserved_word('PRIVATE') -> true; 351reserved_word('REAL') -> true; 352reserved_word('RELATIVE-OID') -> true; 353reserved_word('SEQUENCE') -> true; 354reserved_word('SET') -> true; 355reserved_word('SIZE') -> true; 356reserved_word('STRING') -> true; 357reserved_word('SYNTAX') -> true; 358reserved_word('T61String') -> rstrtype; 359reserved_word('TAGS') -> true; 360reserved_word('TeletexString') -> rstrtype; 361reserved_word('TRUE') -> true; 362reserved_word('UNION') -> true; 363reserved_word('UNIQUE') -> true; 364reserved_word('UNIVERSAL') -> true; 365reserved_word('UniversalString') -> rstrtype; 366reserved_word('UTCTime') -> true; 367reserved_word('UTF8String') -> rstrtype; 368reserved_word('VideotexString') -> rstrtype; 369reserved_word('VisibleString') -> rstrtype; 370reserved_word('WITH') -> true; 371reserved_word(_) -> false. 372