1%% Copyright (c) 2019, Loïc Hoguin <essen@ninenines.eu>
2%%
3%% Permission to use, copy, modify, and/or distribute this software for any
4%% purpose with or without fee is hereby granted, provided that the above
5%% copyright notice and this permission notice appear in all copies.
6%%
7%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15%% The mapping between Erlang and structured headers types is as follow:
16%%
17%% List: list()
18%% Dictionary: map()
19%% Bare item: one bare_item() that can be of type:
20%% Integer: integer()
21%% Float: float()
22%% String: {string, binary()}
23%% Token: {token, binary()}
24%% Byte sequence: {binary, binary()}
25%% Boolean: boolean()
26%% And finally:
27%% Type with Parameters: {with_params, Type, Parameters}
28%% Parameters: [{binary(), bare_item()}]
29
30-module(cow_http_struct_hd).
31
32-export([parse_dictionary/1]).
33-export([parse_item/1]).
34-export([parse_list/1]).
35-export([dictionary/1]).
36-export([item/1]).
37-export([list/1]).
38
39-include("cow_parse.hrl").
40
41-type sh_list() :: [sh_item() | sh_inner_list()].
42-type sh_inner_list() :: sh_with_params([sh_item()]).
43-type sh_params() :: #{binary() => sh_bare_item() | undefined}.
44-type sh_dictionary() :: {#{binary() => sh_item() | sh_inner_list()}, [binary()]}.
45-type sh_item() :: sh_with_params(sh_bare_item()).
46-type sh_bare_item() :: integer() | float() | boolean()
47	| {string | token | binary, binary()}.
48-type sh_with_params(Type) :: {with_params, Type, sh_params()}.
49
50-define(IS_LC_ALPHA(C),
51	(C =:= $a) or (C =:= $b) or (C =:= $c) or (C =:= $d) or (C =:= $e) or
52	(C =:= $f) or (C =:= $g) or (C =:= $h) or (C =:= $i) or (C =:= $j) or
53	(C =:= $k) or (C =:= $l) or (C =:= $m) or (C =:= $n) or (C =:= $o) or
54	(C =:= $p) or (C =:= $q) or (C =:= $r) or (C =:= $s) or (C =:= $t) or
55	(C =:= $u) or (C =:= $v) or (C =:= $w) or (C =:= $x) or (C =:= $y) or
56	(C =:= $z)
57).
58
59%% Parsing.
60
61-spec parse_dictionary(binary()) -> sh_dictionary().
62parse_dictionary(<<>>) ->
63	{#{}, []};
64parse_dictionary(<<C,R/bits>>) when ?IS_LC_ALPHA(C) ->
65	{Dict, Order, <<>>} = parse_dict_key(R, #{}, [], <<C>>),
66	{Dict, Order}.
67
68parse_dict_key(<<$=,$(,R0/bits>>, Acc, Order, K) ->
69	false = maps:is_key(K, Acc),
70	{Item, R} = parse_inner_list(R0, []),
71	parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]);
72parse_dict_key(<<$=,R0/bits>>, Acc, Order, K) ->
73	false = maps:is_key(K, Acc),
74	{Item, R} = parse_item1(R0),
75	parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]);
76parse_dict_key(<<C,R/bits>>, Acc, Order, K)
77		when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C)
78			or (C =:= $_) or (C =:= $-) or (C =:= $*) ->
79	parse_dict_key(R, Acc, Order, <<K/binary,C>>).
80
81parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) ->
82	parse_dict_before_sep(R, Acc, Order);
83parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when C =:= $, ->
84	parse_dict_before_member(R, Acc, Order);
85parse_dict_before_sep(<<>>, Acc, Order) ->
86	{Acc, lists:reverse(Order), <<>>}.
87
88parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) ->
89	parse_dict_before_member(R, Acc, Order);
90parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_LC_ALPHA(C) ->
91	parse_dict_key(R, Acc, Order, <<C>>).
92
93-spec parse_item(binary()) -> sh_item().
94parse_item(Bin) ->
95	{Item, <<>>} = parse_item1(Bin),
96	Item.
97
98parse_item1(Bin) ->
99	case parse_bare_item(Bin) of
100		{Item, <<$;,R/bits>>} ->
101			{Params, Rest} = parse_before_param(R, #{}),
102			{{with_params, Item, Params}, Rest};
103		{Item, Rest} ->
104			{{with_params, Item, #{}}, Rest}
105	end.
106
107-spec parse_list(binary()) -> sh_list().
108parse_list(<<>>) ->
109	[];
110parse_list(Bin) ->
111	parse_list_before_member(Bin, []).
112
113parse_list_member(<<$(,R0/bits>>, Acc) ->
114	{Item, R} = parse_inner_list(R0, []),
115	parse_list_before_sep(R, [Item|Acc]);
116parse_list_member(R0, Acc) ->
117	{Item, R} = parse_item1(R0),
118	parse_list_before_sep(R, [Item|Acc]).
119
120parse_list_before_sep(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
121	parse_list_before_sep(R, Acc);
122parse_list_before_sep(<<$,,R/bits>>, Acc) ->
123	parse_list_before_member(R, Acc);
124parse_list_before_sep(<<>>, Acc) ->
125	lists:reverse(Acc).
126
127parse_list_before_member(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
128	parse_list_before_member(R, Acc);
129parse_list_before_member(R, Acc) ->
130	parse_list_member(R, Acc).
131
132%% Internal.
133
134parse_inner_list(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
135	parse_inner_list(R, Acc);
136parse_inner_list(<<$),$;,R0/bits>>, Acc) ->
137	{Params, R} = parse_before_param(R0, #{}),
138	{{with_params, lists:reverse(Acc), Params}, R};
139parse_inner_list(<<$),R/bits>>, Acc) ->
140	{{with_params, lists:reverse(Acc), #{}}, R};
141parse_inner_list(R0, Acc) ->
142	{Item, R = <<C,_/bits>>} = parse_item1(R0),
143	true = (C =:= $\s) orelse (C =:= $)),
144	parse_inner_list(R, [Item|Acc]).
145
146parse_before_param(<<C,R/bits>>, Acc) when ?IS_WS(C) ->
147	parse_before_param(R, Acc);
148parse_before_param(<<C,R/bits>>, Acc) when ?IS_LC_ALPHA(C) ->
149	parse_param(R, Acc, <<C>>).
150
151parse_param(<<$;,R/bits>>, Acc, K) ->
152	parse_before_param(R, Acc#{K => undefined});
153parse_param(<<$=,R0/bits>>, Acc, K) ->
154	case parse_bare_item(R0) of
155		{Item, <<$;,R/bits>>} ->
156			false = maps:is_key(K, Acc),
157			parse_before_param(R, Acc#{K => Item});
158		{Item, R} ->
159			false = maps:is_key(K, Acc),
160			{Acc#{K => Item}, R}
161	end;
162parse_param(<<C,R/bits>>, Acc, K)
163		when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C)
164			or (C =:= $_) or (C =:= $-) or (C =:= $*) ->
165	parse_param(R, Acc, <<K/binary,C>>);
166parse_param(R, Acc, K) ->
167	false = maps:is_key(K, Acc),
168	{Acc#{K => undefined}, R}.
169
170%% Integer or float.
171parse_bare_item(<<$-,R/bits>>) -> parse_number(R, 0, <<$->>);
172parse_bare_item(<<C,R/bits>>) when ?IS_DIGIT(C) -> parse_number(R, 1, <<C>>);
173%% String.
174parse_bare_item(<<$",R/bits>>) -> parse_string(R, <<>>);
175%% Token.
176parse_bare_item(<<C,R/bits>>) when ?IS_ALPHA(C) -> parse_token(R, <<C>>);
177%% Byte sequence.
178parse_bare_item(<<$*,R/bits>>) -> parse_binary(R, <<>>);
179%% Boolean.
180parse_bare_item(<<"?0",R/bits>>) -> {false, R};
181parse_bare_item(<<"?1",R/bits>>) -> {true, R}.
182
183parse_number(<<C,R/bits>>, L, Acc) when ?IS_DIGIT(C) ->
184	parse_number(R, L+1, <<Acc/binary,C>>);
185parse_number(<<C,R/bits>>, L, Acc) when C =:= $. ->
186	parse_float(R, L, 0, <<Acc/binary,C>>);
187parse_number(R, L, Acc) when L =< 15 ->
188	{binary_to_integer(Acc), R}.
189
190parse_float(<<C,R/bits>>, L1, L2, Acc) when ?IS_DIGIT(C) ->
191	parse_float(R, L1, L2+1, <<Acc/binary,C>>);
192parse_float(R, L1, L2, Acc) when
193		L1 =< 9, L2 =< 6;
194		L1 =< 10, L2 =< 5;
195		L1 =< 11, L2 =< 4;
196		L1 =< 12, L2 =< 3;
197		L1 =< 13, L2 =< 2;
198		L1 =< 14, L2 =< 1 ->
199	{binary_to_float(Acc), R}.
200
201parse_string(<<$\\,$",R/bits>>, Acc) ->
202	parse_string(R, <<Acc/binary,$">>);
203parse_string(<<$\\,$\\,R/bits>>, Acc) ->
204	parse_string(R, <<Acc/binary,$\\>>);
205parse_string(<<$",R/bits>>, Acc) ->
206	{{string, Acc}, R};
207parse_string(<<C,R/bits>>, Acc) when
208		C >= 16#20, C =< 16#21;
209		C >= 16#23, C =< 16#5b;
210		C >= 16#5d, C =< 16#7e ->
211	parse_string(R, <<Acc/binary,C>>).
212
213parse_token(<<C,R/bits>>, Acc) when ?IS_TOKEN(C) or (C =:= $:) or (C =:= $/) ->
214	parse_token(R, <<Acc/binary,C>>);
215parse_token(R, Acc) ->
216	{{token, Acc}, R}.
217
218parse_binary(<<$*,R/bits>>, Acc) ->
219	{{binary, base64:decode(Acc)}, R};
220parse_binary(<<C,R/bits>>, Acc) when ?IS_ALPHANUM(C) or (C =:= $+) or (C =:= $/) or (C =:= $=) ->
221	parse_binary(R, <<Acc/binary,C>>).
222
223-ifdef(TEST).
224parse_struct_hd_test_() ->
225	Files = filelib:wildcard("deps/structured-header-tests/*.json"),
226	lists:flatten([begin
227		{ok, JSON} = file:read_file(File),
228		Tests = jsx:decode(JSON, [return_maps]),
229		[
230			{iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() ->
231				%% The implementation is strict. We fail whenever we can.
232				CanFail = maps:get(<<"can_fail">>, Test, false),
233				MustFail = maps:get(<<"must_fail">>, Test, false),
234				Expected = case MustFail of
235					true -> undefined;
236					false -> expected_to_term(maps:get(<<"expected">>, Test))
237				end,
238				Raw = raw_to_binary(Raw0),
239				case HeaderType of
240					<<"dictionary">> when MustFail; CanFail ->
241						{'EXIT', _} = (catch parse_dictionary(Raw));
242					%% The test "binary.json: non-zero pad bits" does not fail
243					%% due to our reliance on Erlang/OTP's base64 module.
244					<<"item">> when CanFail ->
245						case (catch parse_item(Raw)) of
246							{'EXIT', _} -> ok;
247							Expected -> ok
248						end;
249					<<"item">> when MustFail ->
250						{'EXIT', _} = (catch parse_item(Raw));
251					<<"list">> when MustFail; CanFail ->
252						{'EXIT', _} = (catch parse_list(Raw));
253					<<"dictionary">> ->
254						{Expected, _Order} = (catch parse_dictionary(Raw));
255					<<"item">> ->
256						Expected = (catch parse_item(Raw));
257					<<"list">> ->
258						Expected = (catch parse_list(Raw))
259				end
260			end}
261		|| Test=#{
262			<<"name">> := Name,
263			<<"header_type">> := HeaderType,
264			<<"raw">> := Raw0
265		} <- Tests]
266	end || File <- Files]).
267
268%% Item.
269expected_to_term(E=[_, Params]) when is_map(Params) ->
270	e2t(E);
271%% Outer list.
272expected_to_term(Expected) when is_list(Expected) ->
273	[e2t(E) || E <- Expected];
274expected_to_term(Expected) ->
275	e2t(Expected).
276
277%% Dictionary.
278e2t(Dict) when is_map(Dict) ->
279	maps:map(fun(_, V) -> e2t(V) end, Dict);
280%% Inner list.
281e2t([List, Params]) when is_list(List) ->
282	{with_params, [e2t(E) || E <- List],
283		maps:map(fun(_, P) -> e2tb(P) end, Params)};
284%% Item.
285e2t([Bare, Params]) ->
286	{with_params, e2tb(Bare),
287		maps:map(fun(_, P) -> e2tb(P) end, Params)}.
288
289%% Bare item.
290e2tb(#{<<"__type">> := <<"token">>, <<"value">> := V}) ->
291	{token, V};
292e2tb(#{<<"__type">> := <<"binary">>, <<"value">> := V}) ->
293	{binary, base32:decode(V)};
294e2tb(V) when is_binary(V) ->
295	{string, V};
296e2tb(null) ->
297	undefined;
298e2tb(V) ->
299	V.
300
301%% The Cowlib parsers currently do not support resuming parsing
302%% in the case of multiple headers. To make tests work we modify
303%% the raw value the same way Cowboy does when encountering
304%% multiple headers: by adding a comma and space in between.
305%%
306%% Similarly, the Cowlib parsers expect the leading and trailing
307%% whitespace to be removed before calling the parser.
308raw_to_binary(RawList) ->
309	trim_ws(iolist_to_binary(lists:join(<<", ">>, RawList))).
310
311trim_ws(<<C,R/bits>>) when ?IS_WS(C) -> trim_ws(R);
312trim_ws(R) -> trim_ws_end(R, byte_size(R) - 1).
313
314trim_ws_end(_, -1) ->
315	<<>>;
316trim_ws_end(Value, N) ->
317	case binary:at(Value, N) of
318		$\s -> trim_ws_end(Value, N - 1);
319		$\t -> trim_ws_end(Value, N - 1);
320		_ ->
321			S = N + 1,
322			<< Value2:S/binary, _/bits >> = Value,
323			Value2
324	end.
325-endif.
326
327%% Building.
328
329-spec dictionary(#{binary() => sh_item() | sh_inner_list()}
330		| [{binary(), sh_item() | sh_inner_list()}])
331	-> iolist().
332%% @todo Also accept this? dictionary({Map, Order}) ->
333dictionary(Map) when is_map(Map) ->
334	dictionary(maps:to_list(Map));
335dictionary(KVList) when is_list(KVList) ->
336	lists:join(<<", ">>, [
337		[Key, $=, item_or_inner_list(Value)]
338	|| {Key, Value} <- KVList]).
339
340-spec item(sh_item()) -> iolist().
341item({with_params, BareItem, Params}) ->
342	[bare_item(BareItem), params(Params)].
343
344-spec list(sh_list()) -> iolist().
345list(List) ->
346	lists:join(<<", ">>, [item_or_inner_list(Value) || Value <- List]).
347
348item_or_inner_list(Value={with_params, List, _}) when is_list(List) ->
349	inner_list(Value);
350item_or_inner_list(Value) ->
351	item(Value).
352
353inner_list({with_params, List, Params}) ->
354	[$(, lists:join($\s, [item(Value) || Value <- List]), $), params(Params)].
355
356bare_item({string, String}) ->
357	[$", escape_string(String, <<>>), $"];
358bare_item({token, Token}) ->
359	Token;
360bare_item({binary, Binary}) ->
361	[$*, base64:encode(Binary), $*];
362bare_item(Integer) when is_integer(Integer) ->
363	integer_to_binary(Integer);
364%% In order to properly reproduce the float as a string we
365%% must first determine how many decimals we want in the
366%% fractional component, otherwise rounding errors may occur.
367bare_item(Float) when is_float(Float) ->
368	Decimals = case trunc(Float) of
369		I when I >= 10000000000000 -> 1;
370		I when I >= 1000000000000 -> 2;
371		I when I >= 100000000000 -> 3;
372		I when I >= 10000000000 -> 4;
373		I when I >= 1000000000 -> 5;
374		_ -> 6
375	end,
376	float_to_binary(Float, [{decimals, Decimals}, compact]);
377bare_item(true) ->
378	<<"?1">>;
379bare_item(false) ->
380	<<"?0">>.
381
382escape_string(<<>>, Acc) -> Acc;
383escape_string(<<$\\,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$\\>>);
384escape_string(<<$",R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$">>);
385escape_string(<<C,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,C>>).
386
387params(Params) ->
388	maps:fold(fun
389		(Key, undefined, Acc) ->
390			[[$;, Key]|Acc];
391		(Key, Value, Acc) ->
392			[[$;, Key, $=, bare_item(Value)]|Acc]
393	end, [], Params).
394
395-ifdef(TEST).
396struct_hd_identity_test_() ->
397	Files = filelib:wildcard("deps/structured-header-tests/*.json"),
398	lists:flatten([begin
399		{ok, JSON} = file:read_file(File),
400		Tests = jsx:decode(JSON, [return_maps]),
401		[
402			{iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() ->
403				Expected = expected_to_term(Expected0),
404				case HeaderType of
405					<<"dictionary">> ->
406						{Expected, _Order} = parse_dictionary(iolist_to_binary(dictionary(Expected)));
407					<<"item">> ->
408						Expected = parse_item(iolist_to_binary(item(Expected)));
409					<<"list">> ->
410						Expected = parse_list(iolist_to_binary(list(Expected)))
411				end
412			end}
413		|| #{
414			<<"name">> := Name,
415			<<"header_type">> := HeaderType,
416			%% We only run tests that must not fail.
417			<<"expected">> := Expected0
418		} <- Tests]
419	end || File <- Files]).
420-endif.
421