1%%% -*- erlang -*-
2%%%
3%%% This file is part of hackney_lib released under the Apache 2 license.
4%%% See the NOTICE for more information.
5%%%
6-module(hackney_bstr).
7
8-export([to_binary/1,
9  to_lower/1, to_upper/1,
10  char_to_lower/1, char_to_upper/1,
11  join/2,
12  to_hex/1,
13  token_ci/2, token/2,
14  list/2,
15  nonempty_list/2,
16  params/2,
17  parameterized_tokens/1,
18  whitespace/2,
19  digits/1, digits/2, digits/3,
20  alpha/2,
21  word/2,
22  trim/1]).
23
24-export([quoted_string/2]).
25
26to_binary(V) when is_list(V) ->
27  list_to_binary(V);
28to_binary(V) when is_atom(V) ->
29  atom_to_binary(V, latin1);
30to_binary(V) when is_integer(V) ->
31  list_to_binary(integer_to_list(V));
32to_binary(V) when is_binary(V) ->
33  V.
34
35%% @doc Convert a binary string to lowercase.
36-spec to_lower(binary()|atom()|list()) -> binary().
37to_lower(L) when is_binary(L) ->
38  << << (char_to_lower(C)) >> || << C >> <= L >>;
39to_lower(L) ->
40  to_lower(to_binary(L)).
41
42-spec to_upper(binary()|atom()|list()) -> binary().
43to_upper(U) when is_binary(U)->
44  << << (char_to_upper(C)) >> || << C >> <= U >>;
45to_upper(L) ->
46  to_upper(to_binary(L)).
47
48
49%% @doc Convert [A-Z] characters to lowercase.
50%% @end
51%% We gain noticeable speed by matching each value directly.
52-spec char_to_lower(char()) -> char().
53char_to_lower($A) -> $a;
54char_to_lower($B) -> $b;
55char_to_lower($C) -> $c;
56char_to_lower($D) -> $d;
57char_to_lower($E) -> $e;
58char_to_lower($F) -> $f;
59char_to_lower($G) -> $g;
60char_to_lower($H) -> $h;
61char_to_lower($I) -> $i;
62char_to_lower($J) -> $j;
63char_to_lower($K) -> $k;
64char_to_lower($L) -> $l;
65char_to_lower($M) -> $m;
66char_to_lower($N) -> $n;
67char_to_lower($O) -> $o;
68char_to_lower($P) -> $p;
69char_to_lower($Q) -> $q;
70char_to_lower($R) -> $r;
71char_to_lower($S) -> $s;
72char_to_lower($T) -> $t;
73char_to_lower($U) -> $u;
74char_to_lower($V) -> $v;
75char_to_lower($W) -> $w;
76char_to_lower($X) -> $x;
77char_to_lower($Y) -> $y;
78char_to_lower($Z) -> $z;
79char_to_lower(Ch) -> Ch.
80
81%% @doc Convert [a-z] characters to uppercase.
82-spec char_to_upper(char()) -> char().
83char_to_upper($a) -> $A;
84char_to_upper($b) -> $B;
85char_to_upper($c) -> $C;
86char_to_upper($d) -> $D;
87char_to_upper($e) -> $E;
88char_to_upper($f) -> $F;
89char_to_upper($g) -> $G;
90char_to_upper($h) -> $H;
91char_to_upper($i) -> $I;
92char_to_upper($j) -> $J;
93char_to_upper($k) -> $K;
94char_to_upper($l) -> $L;
95char_to_upper($m) -> $M;
96char_to_upper($n) -> $N;
97char_to_upper($o) -> $O;
98char_to_upper($p) -> $P;
99char_to_upper($q) -> $Q;
100char_to_upper($r) -> $R;
101char_to_upper($s) -> $S;
102char_to_upper($t) -> $T;
103char_to_upper($u) -> $U;
104char_to_upper($v) -> $V;
105char_to_upper($w) -> $W;
106char_to_upper($x) -> $X;
107char_to_upper($y) -> $Y;
108char_to_upper($z) -> $Z;
109char_to_upper(Ch) -> Ch.
110
111join([], _Separator) ->
112  <<>>;
113join([S], _separator) ->
114  S;
115join(L, Separator) ->
116  iolist_to_binary(join(lists:reverse(L), Separator, [])).
117
118join([], _Separator, Acc) ->
119  Acc;
120join([S | Rest], Separator, []) ->
121  join(Rest, Separator, [S]);
122join([S | Rest], Separator, Acc) ->
123  join(Rest, Separator, [S, Separator | Acc]).
124
125to_hex([]) ->
126  [];
127to_hex(Bin) when is_binary(Bin) ->
128  to_hex(binary_to_list(Bin));
129to_hex([H|T]) ->
130  [to_digit(H div 16), to_digit(H rem 16) | to_hex(T)].
131
132to_digit(N) when N < 10 -> $0 + N;
133to_digit(N)             -> $a + N-10.
134
135
136
137%% @doc Parse a case-insensitive token.
138%%
139%% Changes all characters to lowercase.
140-spec token_ci(binary(), fun()) -> any().
141token_ci(Data, Fun) ->
142  token(Data, Fun, ci, <<>>).
143
144%% @doc Parse a token.
145-spec token(binary(), fun()) -> any().
146token(Data, Fun) ->
147  token(Data, Fun, cs, <<>>).
148
149-spec token(binary(), fun(), ci | cs, binary()) -> any().
150token(<<>>, Fun, _Case, Acc) ->
151  Fun(<<>>, Acc);
152token(Data = << C, _Rest/binary >>, Fun, _Case, Acc)
153  when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
154       C =:= $,; C =:= $;; C =:= $:; C =:= $\\; C =:= $";
155       C =:= $/; C =:= $[; C =:= $]; C =:= $?; C =:= $=;
156       C =:= ${; C =:= $}; C =:= $\s; C =:= $\t;
157       C < 32; C =:= 127 ->
158  Fun(Data, Acc);
159token(<< C, Rest/binary >>, Fun, Case = ci, Acc) ->
160  C2 = char_to_lower(C),
161  token(Rest, Fun, Case, << Acc/binary, C2 >>);
162token(<< C, Rest/binary >>, Fun, Case, Acc) ->
163  token(Rest, Fun, Case, << Acc/binary, C >>).
164
165
166%% @doc Parse a non-empty list of the given type.
167-spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
168nonempty_list(Data, Fun) ->
169  case list(Data, Fun, []) of
170    {error, badarg} -> {error, badarg};
171    [] -> {error, badarg};
172    L -> lists:reverse(L)
173  end.
174
175%% @doc Parse a list of the given type.
176-spec list(binary(), fun()) -> list() | {error, badarg}.
177list(Data, Fun) ->
178  case list(Data, Fun, []) of
179    {error, badarg} -> {error, badarg};
180    L -> lists:reverse(L)
181  end.
182
183-spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
184%% From the RFC:
185%% <blockquote>Wherever this construct is used, null elements are allowed,
186%% but do not contribute to the count of elements present.
187%% That is, "(element), , (element) " is permitted, but counts
188%% as only two elements. Therefore, where at least one element is required,
189%% at least one non-null element MUST be present.</blockquote>
190list(Data, Fun, Acc) ->
191  whitespace(Data,
192    fun (<<>>) -> Acc;
193        (<< $,, Rest/binary >>) -> list(Rest, Fun, Acc);
194        (Rest) -> Fun(Rest,
195          fun (D, I) -> whitespace(D,
196            fun (<<>>) -> [I|Acc];
197                (<< $,, R/binary >>) -> list(R, Fun,
198                  [I|Acc]);
199                (_Any) -> {error, badarg}
200            end)
201          end)
202    end).
203
204
205%% @doc Parse a list of parameters (a=b;c=d).
206-spec params(binary(), fun()) -> any().
207params(Data, Fun) ->
208  params(Data, Fun, []).
209
210-spec params(binary(), fun(), [{binary(), binary()}]) -> any().
211params(Data, Fun, Acc) ->
212  whitespace(Data,
213    fun (<< $;, Rest/binary >>) ->
214      param(Rest,
215        fun (Rest2, Attr, Value) ->
216          params(Rest2, Fun, [{Attr, Value}|Acc])
217        end);
218        (Rest) ->
219          Fun(Rest, lists:reverse(Acc))
220    end).
221
222-spec param(binary(), fun()) -> any().
223param(Data, Fun) ->
224  whitespace(Data,
225    fun (Rest) ->
226      token_ci(Rest,
227        fun (_Rest2, <<>>) -> {error, badarg};
228            (<< $=, Rest2/binary >>, Attr) ->
229              word(Rest2,
230                fun (Rest3, Value) ->
231                  Fun(Rest3, Attr, Value)
232                end);
233            (_Rest2, _Attr) -> {error, badarg}
234        end)
235    end).
236
237%% @doc Parse a non empty list of tokens followed with optional parameters.
238-spec parameterized_tokens(binary()) -> any().
239parameterized_tokens(Data) ->
240  nonempty_list(Data,
241    fun (D, Fun) ->
242      token(D,
243        fun (_Rest, <<>>) -> {error, badarg};
244            (Rest, Token) ->
245              parameterized_tokens_params(Rest,
246                fun (Rest2, Params) ->
247                  Fun(Rest2, {Token, Params})
248                end, [])
249        end)
250    end).
251
252-spec parameterized_tokens_params(binary(), fun(),
253  [binary() | {binary(), binary()}]) -> any().
254parameterized_tokens_params(Data, Fun, Acc) ->
255  whitespace(Data,
256    fun (<< $;, Rest/binary >>) ->
257      parameterized_tokens_param(Rest,
258        fun (Rest2, Param) ->
259          parameterized_tokens_params(Rest2, Fun,
260            [Param|Acc])
261        end);
262        (Rest) ->
263          Fun(Rest, lists:reverse(Acc))
264    end).
265
266-spec parameterized_tokens_param(binary(), fun()) -> any().
267parameterized_tokens_param(Data, Fun) ->
268  whitespace(Data,
269    fun (Rest) ->
270      token(Rest,
271        fun (_Rest2, <<>>) ->
272          {error, badarg};
273            (<< $=, Rest2/binary >>, Attr) ->
274              word(Rest2,
275                fun (Rest3, Value) ->
276                  Fun(Rest3, {Attr, Value})
277                end);
278            (Rest2, Attr) ->
279              Fun(Rest2, Attr)
280        end)
281    end).
282
283%% @doc Skip whitespace.
284-spec whitespace(binary(), fun()) -> any().
285whitespace(<< C, Rest/binary >>, Fun)
286  when C =:= $\s; C =:= $\t ->
287  whitespace(Rest, Fun);
288whitespace(Data, Fun) ->
289  Fun(Data).
290
291%% @doc Parse a list of digits as a non negative integer.
292-spec digits(binary()) -> non_neg_integer() | {error, badarg}.
293digits(Data) ->
294  digits(Data,
295    fun (Rest, I) ->
296      whitespace(Rest,
297        fun (<<>>) ->
298          I;
299            (_Rest2) ->
300              {error, badarg}
301        end)
302    end).
303
304-spec digits(binary(), fun()) -> any().
305digits(<< C, Rest/binary >>, Fun)
306  when C >= $0, C =< $9 ->
307  digits(Rest, Fun, C - $0);
308digits(_Data, _Fun) ->
309  {error, badarg}.
310
311-spec digits(binary(), fun(), non_neg_integer()) -> any().
312digits(<< C, Rest/binary >>, Fun, Acc)
313  when C >= $0, C =< $9 ->
314  digits(Rest, Fun, Acc * 10 + (C - $0));
315digits(Data, Fun, Acc) ->
316  Fun(Data, Acc).
317
318
319%% @doc Parse a list of case-insensitive alpha characters.
320%%
321%% Changes all characters to lowercase.
322-spec alpha(binary(), fun()) -> any().
323alpha(Data, Fun) ->
324  alpha(Data, Fun, <<>>).
325
326-spec alpha(binary(), fun(), binary()) -> any().
327alpha(<<>>, Fun, Acc) ->
328  Fun(<<>>, Acc);
329alpha(<< C, Rest/binary >>, Fun, Acc)
330  when C >= $a andalso C =< $z;
331       C >= $A andalso C =< $Z ->
332  C2 = char_to_lower(C),
333  alpha(Rest, Fun, << Acc/binary, C2 >>);
334alpha(Data, Fun, Acc) ->
335  Fun(Data, Acc).
336
337%% @doc Parse either a token or a quoted string.
338-spec word(binary(), fun()) -> any().
339word(Data = << $", _/binary >>, Fun) ->
340  quoted_string(Data, Fun);
341word(Data, Fun) ->
342  token(Data,
343    fun (_Rest, <<>>) -> {error, badarg};
344        (Rest, Token) -> Fun(Rest, Token)
345    end).
346
347-spec trim(binary()) -> binary().
348trim(Data) ->
349  re:replace(Data, "^\\s+|\\s+$", "", [{return, binary}, global]).
350
351-spec quoted_string(binary(), fun()) -> any().
352quoted_string(<< $", Rest/binary >>, Fun) ->
353  quoted_string(Rest, Fun, <<>>).
354
355-spec quoted_string(binary(), fun(), binary()) -> any().
356quoted_string(<<>>, _Fun, _Acc) ->
357  {error, badarg};
358quoted_string(<< $", Rest/binary >>, Fun, Acc) ->
359  Fun(Rest, Acc);
360quoted_string(<< $\\, C, Rest/binary >>, Fun, Acc) ->
361  quoted_string(Rest, Fun, << Acc/binary, C >>);
362quoted_string(<< C, Rest/binary >>, Fun, Acc) ->
363  quoted_string(Rest, Fun, << Acc/binary, C >>).
364