1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2010-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21-module(diameter_dict_scanner). 22 23%% 24%% A scanner for dictionary files of the form expected by yecc. 25%% 26 27-export([scan/1, 28 format_error/1]). 29 30-export([is_name/1]). 31 32%% ----------------------------------------------------------- 33%% # scan/1 34%% ----------------------------------------------------------- 35 36-spec scan(string() | binary()) 37 -> {ok, [Token]} 38 | {error, {string(), string(), Lineno}} 39 when Token :: {word, Lineno, string()} 40 | {number, Lineno, non_neg_integer()} 41 | {Symbol, Lineno}, 42 Lineno :: pos_integer(), 43 Symbol :: '{' | '}' | '<' | '>' | '[' | ']' 44 | '*' | '::=' | ':' | ',' | '-' 45 | avp_types 46 | avp_vendor_id 47 | codecs 48 | custom_types 49 | define 50 | grouped 51 | id 52 | inherits 53 | messages 54 | name 55 | prefix 56 | vendor 57 | '$end' 58 | code 59 | 'answer-message' 60 | 'AVP' 61 | 'AVP-Header' 62 | 'Diameter' 63 | 'Diameter-Header' 64 | 'Header' 65 | 'REQ' 66 | 'PXY' 67 | 'ERR'. 68 69scan(B) 70 when is_binary(B) -> 71 scan(binary_to_list(B)); 72scan(S) -> 73 scan(S, {1, []}). 74 75scan(S, {Lineno, Acc}) -> 76 case split(S) of 77 '$end' = E -> 78 {ok, lists:reverse([{E, Lineno} | Acc])}; 79 {Tok, Rest} -> 80 scan(Rest, acc(Tok, Lineno, Acc)); 81 Reason when is_list(Reason) -> 82 {error, {Reason, S, Lineno}} 83 end. 84 85%% format_error/1 86 87format_error({Reason, Input, Lineno}) -> 88 io_lib:format("~s at line ~p: ~s", 89 [Reason, Lineno, head(Input, [], 20, true)]). 90 91%% is_name/1 92 93is_name([H|T]) -> 94 is_alphanum(H) andalso lists:all(fun is_name_ch/1, T). 95 96%% =========================================================================== 97 98head(Str, Acc, N, _) 99 when [] == Str; 100 0 == N; 101 $\r == hd(Str); 102 $\n == hd(Str) -> 103 lists:reverse(Acc); 104head([C|Rest], Acc, N, true = T) %% skip leading whitespace 105 when C == $\s; 106 C == $\t; 107 C == $\f; 108 C == $\v -> 109 head(Rest, Acc, N, T); 110head([C|Rest], Acc, N, _) -> 111 head(Rest, [C|Acc], N-1, false). 112 113acc(endline, Lineno, Acc) -> 114 {Lineno + 1, Acc}; 115acc(T, Lineno, Acc) -> 116 {Lineno, [tok(T, Lineno) | Acc]}. 117 118tok({Cat, Sym}, Lineno) -> 119 {Cat, Lineno, Sym}; 120tok(Sym, Lineno) -> 121 {Sym, Lineno}. 122 123%% # split/1 124%% 125%% Output: {Token, Rest} | atom() 126 127%% Finito. 128split("") -> 129 '$end'; 130 131%% Skip comments. This precludes using semicolon for any other purpose. 132split([$;|T]) -> 133 split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T)); 134 135%% Beginning of a section. 136split([$@|T]) -> 137 {Name, Rest} = lists:splitwith(fun is_name_ch/1, T), 138 case section(Name) of 139 false -> 140 "Unknown section"; 141 'end' -> 142 '$end'; 143 A -> 144 {A, Rest} 145 end; 146 147split("::=" ++ T) -> 148 {'::=', T}; 149 150split([H|T]) 151 when H == ${; H == $}; 152 H == $<; H == $>; 153 H == $[; H == $]; 154 H == $*; H == $:; H == $,; H == $- -> 155 {list_to_atom([H]), T}; 156 157%% RFC 3588 requires various names to begin with a letter but 3GPP (for 158%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.) 159split([H|_] = L) when $0 =< H, H =< $9 -> 160 {P, Rest} = splitwith(fun is_name_ch/1, L), 161 Tok = try 162 {number, read_int(P)} 163 catch 164 error:_ -> 165 word(P) 166 end, 167 {Tok, Rest}; 168 169split([H|_] = L) when $a =< H, H =< $z; 170 $A =< H, H =< $Z -> 171 {P, Rest} = splitwith(fun is_name_ch/1, L), 172 {word(P), Rest}; 173 174split([$'|T]) -> 175 case lists:splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of 176 {[_|_] = A, [$'|Rest]} -> 177 {{word, A}, Rest}; 178 {[], [$'|_]} -> 179 "Empty string"; 180 _ -> %% not terminated on same line 181 "Unterminated string" 182 end; 183 184%% Line ending of various forms. 185split([$\r,$\n|T]) -> 186 {endline, T}; 187split([C|T]) 188 when C == $\r; 189 C == $\n -> 190 {endline, T}; 191 192%% Ignore whitespace. 193split([C|T]) 194 when C == $\s; 195 C == $\t; 196 C == $\f; 197 C == $\v -> 198 split(T); 199 200split(_) -> 201 "Unexpected character". 202 203%% word/1 204 205%% Reserved words significant in parsing ... 206word(S) 207 when S == "answer-message"; 208 S == "code"; 209 S == "AVP"; 210 S == "AVP-Header"; 211 S == "Diameter"; 212 S == "Diameter-Header"; 213 S == "Header"; 214 S == "REQ"; 215 S == "PXY"; 216 S == "ERR" -> 217 list_to_atom(S); 218 219%% ... or not. 220word(S) -> 221 {word, S}. 222 223%% section/1 224 225section(N) 226 when N == "avp_types"; 227 N == "avp_vendor_id"; 228 N == "codecs"; 229 N == "custom_types"; 230 N == "define"; 231 N == "end"; 232 N == "enum"; 233 N == "grouped"; 234 N == "id"; 235 N == "inherits"; 236 N == "messages"; 237 N == "name"; 238 N == "prefix"; 239 N == "vendor" -> 240 list_to_atom(N); 241section(_) -> 242 false. 243 244%% read_int/1 245 246read_int([$0,X|S]) 247 when X == $X; 248 X == $x -> 249 {ok, [N], []} = io_lib:fread("~16u", S), 250 N; 251 252read_int(S) -> 253 list_to_integer(S). 254 255%% splitwith/3 256 257splitwith(Fun, [H|T]) -> 258 {SH, ST} = lists:splitwith(Fun, T), 259 {[H|SH], ST}. 260 261is_eol_ch(C) -> 262 C == $\n orelse C == $\r. 263 264is_name_ch(C) -> 265 is_alphanum(C) orelse C == $- orelse C == $_. 266 267is_alphanum(C) -> 268 is_lower(C) orelse is_upper(C) orelse is_digit(C). 269 270is_lower(C) -> 271 $a =< C andalso C =< $z. 272 273is_upper(C) -> 274 $A =< C andalso C =< $Z. 275 276is_digit(C) -> 277 $0 =< C andalso C =< $9. 278