1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2010-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21-module(diameter_dict_scanner).
22
23%%
24%% A scanner for dictionary files of the form expected by yecc.
25%%
26
27-export([scan/1,
28         format_error/1]).
29
30-export([is_name/1]).
31
32%% -----------------------------------------------------------
33%% # scan/1
34%% -----------------------------------------------------------
35
36-spec scan(string() | binary())
37   -> {ok, [Token]}
38    | {error, {string(), string(), Lineno}}
39 when Token  :: {word, Lineno, string()}
40              | {number, Lineno, non_neg_integer()}
41              | {Symbol, Lineno},
42      Lineno   :: pos_integer(),
43      Symbol   :: '{' | '}'   | '<' | '>' | '[' | ']'
44                | '*' | '::=' | ':' | ',' | '-'
45                | avp_types
46                | avp_vendor_id
47                | codecs
48                | custom_types
49                | define
50                | grouped
51                | id
52                | inherits
53                | messages
54                | name
55                | prefix
56                | vendor
57                | '$end'
58                | code
59                | 'answer-message'
60                | 'AVP'
61                | 'AVP-Header'
62                | 'Diameter'
63                | 'Diameter-Header'
64                | 'Header'
65                | 'REQ'
66                | 'PXY'
67                | 'ERR'.
68
69scan(B)
70  when is_binary(B) ->
71    scan(binary_to_list(B));
72scan(S) ->
73    scan(S, {1, []}).
74
75scan(S, {Lineno, Acc}) ->
76    case split(S) of
77        '$end' = E ->
78            {ok, lists:reverse([{E, Lineno} | Acc])};
79        {Tok, Rest} ->
80            scan(Rest, acc(Tok, Lineno, Acc));
81        Reason when is_list(Reason) ->
82            {error, {Reason, S, Lineno}}
83    end.
84
85%% format_error/1
86
87format_error({Reason, Input, Lineno}) ->
88    io_lib:format("~s at line ~p: ~s",
89                  [Reason, Lineno, head(Input, [], 20, true)]).
90
91%% is_name/1
92
93is_name([H|T]) ->
94    is_alphanum(H) andalso lists:all(fun is_name_ch/1, T).
95
96%% ===========================================================================
97
98head(Str, Acc, N, _)
99  when [] == Str;
100       0 == N;
101       $\r == hd(Str);
102       $\n == hd(Str) ->
103    lists:reverse(Acc);
104head([C|Rest], Acc, N, true = T)  %% skip leading whitespace
105  when C == $\s;
106       C == $\t;
107       C == $\f;
108       C == $\v ->
109    head(Rest, Acc, N, T);
110head([C|Rest], Acc, N, _) ->
111    head(Rest, [C|Acc], N-1, false).
112
113acc(endline, Lineno, Acc) ->
114    {Lineno + 1, Acc};
115acc(T, Lineno, Acc) ->
116    {Lineno, [tok(T, Lineno) | Acc]}.
117
118tok({Cat, Sym}, Lineno) ->
119    {Cat, Lineno, Sym};
120tok(Sym, Lineno) ->
121    {Sym, Lineno}.
122
123%% # split/1
124%%
125%% Output: {Token, Rest} | atom()
126
127%% Finito.
128split("") ->
129    '$end';
130
131%% Skip comments. This precludes using semicolon for any other purpose.
132split([$;|T]) ->
133    split(lists:dropwhile(fun(C) -> not is_eol_ch(C) end, T));
134
135%% Beginning of a section.
136split([$@|T]) ->
137    {Name, Rest} = lists:splitwith(fun is_name_ch/1, T),
138    case section(Name) of
139        false ->
140            "Unknown section";
141        'end' ->
142            '$end';
143        A ->
144            {A, Rest}
145    end;
146
147split("::=" ++ T) ->
148    {'::=', T};
149
150split([H|T])
151  when H == ${; H == $};
152       H == $<; H == $>;
153       H == $[; H == $];
154       H == $*; H == $:; H == $,; H == $- ->
155    {list_to_atom([H]), T};
156
157%% RFC 3588 requires various names to begin with a letter but 3GPP (for
158%% one) abuses this. (eg 3GPP-Charging-Id in TS32.299.)
159split([H|_] = L) when $0 =< H, H =< $9 ->
160    {P, Rest} = splitwith(fun is_name_ch/1, L),
161    Tok = try
162              {number, read_int(P)}
163          catch
164              error:_ ->
165                  word(P)
166          end,
167    {Tok, Rest};
168
169split([H|_] = L) when $a =< H, H =< $z;
170                      $A =< H, H =< $Z ->
171    {P, Rest} = splitwith(fun is_name_ch/1, L),
172    {word(P), Rest};
173
174split([$'|T]) ->
175    case lists:splitwith(fun(C) -> not lists:member(C, "'\r\n") end, T) of
176        {[_|_] = A, [$'|Rest]} ->
177            {{word, A}, Rest};
178        {[], [$'|_]} ->
179            "Empty string";
180        _ -> %% not terminated on same line
181            "Unterminated string"
182    end;
183
184%% Line ending of various forms.
185split([$\r,$\n|T]) ->
186    {endline, T};
187split([C|T])
188  when C == $\r;
189       C == $\n ->
190    {endline, T};
191
192%% Ignore whitespace.
193split([C|T])
194  when C == $\s;
195       C == $\t;
196       C == $\f;
197       C == $\v ->
198    split(T);
199
200split(_) ->
201    "Unexpected character".
202
203%% word/1
204
205%% Reserved words significant in parsing ...
206word(S)
207  when S == "answer-message";
208       S == "code";
209       S == "AVP";
210       S == "AVP-Header";
211       S == "Diameter";
212       S == "Diameter-Header";
213       S == "Header";
214       S == "REQ";
215       S == "PXY";
216       S == "ERR" ->
217    list_to_atom(S);
218
219%% ... or not.
220word(S) ->
221    {word, S}.
222
223%% section/1
224
225section(N)
226  when N == "avp_types";
227       N == "avp_vendor_id";
228       N == "codecs";
229       N == "custom_types";
230       N == "define";
231       N == "end";
232       N == "enum";
233       N == "grouped";
234       N == "id";
235       N == "inherits";
236       N == "messages";
237       N == "name";
238       N == "prefix";
239       N == "vendor" ->
240    list_to_atom(N);
241section(_) ->
242    false.
243
244%% read_int/1
245
246read_int([$0,X|S])
247  when X == $X;
248       X == $x ->
249    {ok, [N], []} = io_lib:fread("~16u", S),
250    N;
251
252read_int(S) ->
253    list_to_integer(S).
254
255%% splitwith/3
256
257splitwith(Fun, [H|T]) ->
258    {SH, ST} = lists:splitwith(Fun, T),
259    {[H|SH], ST}.
260
261is_eol_ch(C) ->
262    C == $\n orelse C == $\r.
263
264is_name_ch(C) ->
265    is_alphanum(C) orelse C == $- orelse C == $_.
266
267is_alphanum(C) ->
268    is_lower(C) orelse is_upper(C) orelse is_digit(C).
269
270is_lower(C) ->
271    $a =< C andalso C =< $z.
272
273is_upper(C) ->
274    $A =< C andalso C =< $Z.
275
276is_digit(C) ->
277    $0 =< C andalso C =< $9.
278