1%% Copyright (c) 2019, Loïc Hoguin <essen@ninenines.eu> 2%% 3%% Permission to use, copy, modify, and/or distribute this software for any 4%% purpose with or without fee is hereby granted, provided that the above 5%% copyright notice and this permission notice appear in all copies. 6%% 7%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15%% The mapping between Erlang and structured headers types is as follow: 16%% 17%% List: list() 18%% Dictionary: map() 19%% Bare item: one bare_item() that can be of type: 20%% Integer: integer() 21%% Float: float() 22%% String: {string, binary()} 23%% Token: {token, binary()} 24%% Byte sequence: {binary, binary()} 25%% Boolean: boolean() 26%% And finally: 27%% Type with Parameters: {with_params, Type, Parameters} 28%% Parameters: [{binary(), bare_item()}] 29 30-module(cow_http_struct_hd). 31 32-export([parse_dictionary/1]). 33-export([parse_item/1]). 34-export([parse_list/1]). 35-export([dictionary/1]). 36-export([item/1]). 37-export([list/1]). 38 39-include("cow_parse.hrl"). 40 41-type sh_list() :: [sh_item() | sh_inner_list()]. 42-type sh_inner_list() :: sh_with_params([sh_item()]). 43-type sh_params() :: #{binary() => sh_bare_item() | undefined}. 44-type sh_dictionary() :: {#{binary() => sh_item() | sh_inner_list()}, [binary()]}. 45-type sh_item() :: sh_with_params(sh_bare_item()). 46-type sh_bare_item() :: integer() | float() | boolean() 47 | {string | token | binary, binary()}. 48-type sh_with_params(Type) :: {with_params, Type, sh_params()}. 49 50-define(IS_LC_ALPHA(C), 51 (C =:= $a) or (C =:= $b) or (C =:= $c) or (C =:= $d) or (C =:= $e) or 52 (C =:= $f) or (C =:= $g) or (C =:= $h) or (C =:= $i) or (C =:= $j) or 53 (C =:= $k) or (C =:= $l) or (C =:= $m) or (C =:= $n) or (C =:= $o) or 54 (C =:= $p) or (C =:= $q) or (C =:= $r) or (C =:= $s) or (C =:= $t) or 55 (C =:= $u) or (C =:= $v) or (C =:= $w) or (C =:= $x) or (C =:= $y) or 56 (C =:= $z) 57). 58 59%% Parsing. 60 61-spec parse_dictionary(binary()) -> sh_dictionary(). 62parse_dictionary(<<>>) -> 63 {#{}, []}; 64parse_dictionary(<<C,R/bits>>) when ?IS_LC_ALPHA(C) -> 65 {Dict, Order, <<>>} = parse_dict_key(R, #{}, [], <<C>>), 66 {Dict, Order}. 67 68parse_dict_key(<<$=,$(,R0/bits>>, Acc, Order, K) -> 69 false = maps:is_key(K, Acc), 70 {Item, R} = parse_inner_list(R0, []), 71 parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]); 72parse_dict_key(<<$=,R0/bits>>, Acc, Order, K) -> 73 false = maps:is_key(K, Acc), 74 {Item, R} = parse_item1(R0), 75 parse_dict_before_sep(R, Acc#{K => Item}, [K|Order]); 76parse_dict_key(<<C,R/bits>>, Acc, Order, K) 77 when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C) 78 or (C =:= $_) or (C =:= $-) or (C =:= $*) -> 79 parse_dict_key(R, Acc, Order, <<K/binary,C>>). 80 81parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) -> 82 parse_dict_before_sep(R, Acc, Order); 83parse_dict_before_sep(<<C,R/bits>>, Acc, Order) when C =:= $, -> 84 parse_dict_before_member(R, Acc, Order); 85parse_dict_before_sep(<<>>, Acc, Order) -> 86 {Acc, lists:reverse(Order), <<>>}. 87 88parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_WS(C) -> 89 parse_dict_before_member(R, Acc, Order); 90parse_dict_before_member(<<C,R/bits>>, Acc, Order) when ?IS_LC_ALPHA(C) -> 91 parse_dict_key(R, Acc, Order, <<C>>). 92 93-spec parse_item(binary()) -> sh_item(). 94parse_item(Bin) -> 95 {Item, <<>>} = parse_item1(Bin), 96 Item. 97 98parse_item1(Bin) -> 99 case parse_bare_item(Bin) of 100 {Item, <<$;,R/bits>>} -> 101 {Params, Rest} = parse_before_param(R, #{}), 102 {{with_params, Item, Params}, Rest}; 103 {Item, Rest} -> 104 {{with_params, Item, #{}}, Rest} 105 end. 106 107-spec parse_list(binary()) -> sh_list(). 108parse_list(<<>>) -> 109 []; 110parse_list(Bin) -> 111 parse_list_before_member(Bin, []). 112 113parse_list_member(<<$(,R0/bits>>, Acc) -> 114 {Item, R} = parse_inner_list(R0, []), 115 parse_list_before_sep(R, [Item|Acc]); 116parse_list_member(R0, Acc) -> 117 {Item, R} = parse_item1(R0), 118 parse_list_before_sep(R, [Item|Acc]). 119 120parse_list_before_sep(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 121 parse_list_before_sep(R, Acc); 122parse_list_before_sep(<<$,,R/bits>>, Acc) -> 123 parse_list_before_member(R, Acc); 124parse_list_before_sep(<<>>, Acc) -> 125 lists:reverse(Acc). 126 127parse_list_before_member(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 128 parse_list_before_member(R, Acc); 129parse_list_before_member(R, Acc) -> 130 parse_list_member(R, Acc). 131 132%% Internal. 133 134parse_inner_list(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 135 parse_inner_list(R, Acc); 136parse_inner_list(<<$),$;,R0/bits>>, Acc) -> 137 {Params, R} = parse_before_param(R0, #{}), 138 {{with_params, lists:reverse(Acc), Params}, R}; 139parse_inner_list(<<$),R/bits>>, Acc) -> 140 {{with_params, lists:reverse(Acc), #{}}, R}; 141parse_inner_list(R0, Acc) -> 142 {Item, R = <<C,_/bits>>} = parse_item1(R0), 143 true = (C =:= $\s) orelse (C =:= $)), 144 parse_inner_list(R, [Item|Acc]). 145 146parse_before_param(<<C,R/bits>>, Acc) when ?IS_WS(C) -> 147 parse_before_param(R, Acc); 148parse_before_param(<<C,R/bits>>, Acc) when ?IS_LC_ALPHA(C) -> 149 parse_param(R, Acc, <<C>>). 150 151parse_param(<<$;,R/bits>>, Acc, K) -> 152 parse_before_param(R, Acc#{K => undefined}); 153parse_param(<<$=,R0/bits>>, Acc, K) -> 154 case parse_bare_item(R0) of 155 {Item, <<$;,R/bits>>} -> 156 false = maps:is_key(K, Acc), 157 parse_before_param(R, Acc#{K => Item}); 158 {Item, R} -> 159 false = maps:is_key(K, Acc), 160 {Acc#{K => Item}, R} 161 end; 162parse_param(<<C,R/bits>>, Acc, K) 163 when ?IS_LC_ALPHA(C) or ?IS_DIGIT(C) 164 or (C =:= $_) or (C =:= $-) or (C =:= $*) -> 165 parse_param(R, Acc, <<K/binary,C>>); 166parse_param(R, Acc, K) -> 167 false = maps:is_key(K, Acc), 168 {Acc#{K => undefined}, R}. 169 170%% Integer or float. 171parse_bare_item(<<$-,R/bits>>) -> parse_number(R, 0, <<$->>); 172parse_bare_item(<<C,R/bits>>) when ?IS_DIGIT(C) -> parse_number(R, 1, <<C>>); 173%% String. 174parse_bare_item(<<$",R/bits>>) -> parse_string(R, <<>>); 175%% Token. 176parse_bare_item(<<C,R/bits>>) when ?IS_ALPHA(C) -> parse_token(R, <<C>>); 177%% Byte sequence. 178parse_bare_item(<<$*,R/bits>>) -> parse_binary(R, <<>>); 179%% Boolean. 180parse_bare_item(<<"?0",R/bits>>) -> {false, R}; 181parse_bare_item(<<"?1",R/bits>>) -> {true, R}. 182 183parse_number(<<C,R/bits>>, L, Acc) when ?IS_DIGIT(C) -> 184 parse_number(R, L+1, <<Acc/binary,C>>); 185parse_number(<<C,R/bits>>, L, Acc) when C =:= $. -> 186 parse_float(R, L, 0, <<Acc/binary,C>>); 187parse_number(R, L, Acc) when L =< 15 -> 188 {binary_to_integer(Acc), R}. 189 190parse_float(<<C,R/bits>>, L1, L2, Acc) when ?IS_DIGIT(C) -> 191 parse_float(R, L1, L2+1, <<Acc/binary,C>>); 192parse_float(R, L1, L2, Acc) when 193 L1 =< 9, L2 =< 6; 194 L1 =< 10, L2 =< 5; 195 L1 =< 11, L2 =< 4; 196 L1 =< 12, L2 =< 3; 197 L1 =< 13, L2 =< 2; 198 L1 =< 14, L2 =< 1 -> 199 {binary_to_float(Acc), R}. 200 201parse_string(<<$\\,$",R/bits>>, Acc) -> 202 parse_string(R, <<Acc/binary,$">>); 203parse_string(<<$\\,$\\,R/bits>>, Acc) -> 204 parse_string(R, <<Acc/binary,$\\>>); 205parse_string(<<$",R/bits>>, Acc) -> 206 {{string, Acc}, R}; 207parse_string(<<C,R/bits>>, Acc) when 208 C >= 16#20, C =< 16#21; 209 C >= 16#23, C =< 16#5b; 210 C >= 16#5d, C =< 16#7e -> 211 parse_string(R, <<Acc/binary,C>>). 212 213parse_token(<<C,R/bits>>, Acc) when ?IS_TOKEN(C) or (C =:= $:) or (C =:= $/) -> 214 parse_token(R, <<Acc/binary,C>>); 215parse_token(R, Acc) -> 216 {{token, Acc}, R}. 217 218parse_binary(<<$*,R/bits>>, Acc) -> 219 {{binary, base64:decode(Acc)}, R}; 220parse_binary(<<C,R/bits>>, Acc) when ?IS_ALPHANUM(C) or (C =:= $+) or (C =:= $/) or (C =:= $=) -> 221 parse_binary(R, <<Acc/binary,C>>). 222 223-ifdef(TEST). 224parse_struct_hd_test_() -> 225 Files = filelib:wildcard("deps/structured-header-tests/*.json"), 226 lists:flatten([begin 227 {ok, JSON} = file:read_file(File), 228 Tests = jsx:decode(JSON, [return_maps]), 229 [ 230 {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() -> 231 %% The implementation is strict. We fail whenever we can. 232 CanFail = maps:get(<<"can_fail">>, Test, false), 233 MustFail = maps:get(<<"must_fail">>, Test, false), 234 Expected = case MustFail of 235 true -> undefined; 236 false -> expected_to_term(maps:get(<<"expected">>, Test)) 237 end, 238 Raw = raw_to_binary(Raw0), 239 case HeaderType of 240 <<"dictionary">> when MustFail; CanFail -> 241 {'EXIT', _} = (catch parse_dictionary(Raw)); 242 %% The test "binary.json: non-zero pad bits" does not fail 243 %% due to our reliance on Erlang/OTP's base64 module. 244 <<"item">> when CanFail -> 245 case (catch parse_item(Raw)) of 246 {'EXIT', _} -> ok; 247 Expected -> ok 248 end; 249 <<"item">> when MustFail -> 250 {'EXIT', _} = (catch parse_item(Raw)); 251 <<"list">> when MustFail; CanFail -> 252 {'EXIT', _} = (catch parse_list(Raw)); 253 <<"dictionary">> -> 254 {Expected, _Order} = (catch parse_dictionary(Raw)); 255 <<"item">> -> 256 Expected = (catch parse_item(Raw)); 257 <<"list">> -> 258 Expected = (catch parse_list(Raw)) 259 end 260 end} 261 || Test=#{ 262 <<"name">> := Name, 263 <<"header_type">> := HeaderType, 264 <<"raw">> := Raw0 265 } <- Tests] 266 end || File <- Files]). 267 268%% Item. 269expected_to_term(E=[_, Params]) when is_map(Params) -> 270 e2t(E); 271%% Outer list. 272expected_to_term(Expected) when is_list(Expected) -> 273 [e2t(E) || E <- Expected]; 274expected_to_term(Expected) -> 275 e2t(Expected). 276 277%% Dictionary. 278e2t(Dict) when is_map(Dict) -> 279 maps:map(fun(_, V) -> e2t(V) end, Dict); 280%% Inner list. 281e2t([List, Params]) when is_list(List) -> 282 {with_params, [e2t(E) || E <- List], 283 maps:map(fun(_, P) -> e2tb(P) end, Params)}; 284%% Item. 285e2t([Bare, Params]) -> 286 {with_params, e2tb(Bare), 287 maps:map(fun(_, P) -> e2tb(P) end, Params)}. 288 289%% Bare item. 290e2tb(#{<<"__type">> := <<"token">>, <<"value">> := V}) -> 291 {token, V}; 292e2tb(#{<<"__type">> := <<"binary">>, <<"value">> := V}) -> 293 {binary, base32:decode(V)}; 294e2tb(V) when is_binary(V) -> 295 {string, V}; 296e2tb(null) -> 297 undefined; 298e2tb(V) -> 299 V. 300 301%% The Cowlib parsers currently do not support resuming parsing 302%% in the case of multiple headers. To make tests work we modify 303%% the raw value the same way Cowboy does when encountering 304%% multiple headers: by adding a comma and space in between. 305%% 306%% Similarly, the Cowlib parsers expect the leading and trailing 307%% whitespace to be removed before calling the parser. 308raw_to_binary(RawList) -> 309 trim_ws(iolist_to_binary(lists:join(<<", ">>, RawList))). 310 311trim_ws(<<C,R/bits>>) when ?IS_WS(C) -> trim_ws(R); 312trim_ws(R) -> trim_ws_end(R, byte_size(R) - 1). 313 314trim_ws_end(_, -1) -> 315 <<>>; 316trim_ws_end(Value, N) -> 317 case binary:at(Value, N) of 318 $\s -> trim_ws_end(Value, N - 1); 319 $\t -> trim_ws_end(Value, N - 1); 320 _ -> 321 S = N + 1, 322 << Value2:S/binary, _/bits >> = Value, 323 Value2 324 end. 325-endif. 326 327%% Building. 328 329-spec dictionary(#{binary() => sh_item() | sh_inner_list()} 330 | [{binary(), sh_item() | sh_inner_list()}]) 331 -> iolist(). 332%% @todo Also accept this? dictionary({Map, Order}) -> 333dictionary(Map) when is_map(Map) -> 334 dictionary(maps:to_list(Map)); 335dictionary(KVList) when is_list(KVList) -> 336 lists:join(<<", ">>, [ 337 [Key, $=, item_or_inner_list(Value)] 338 || {Key, Value} <- KVList]). 339 340-spec item(sh_item()) -> iolist(). 341item({with_params, BareItem, Params}) -> 342 [bare_item(BareItem), params(Params)]. 343 344-spec list(sh_list()) -> iolist(). 345list(List) -> 346 lists:join(<<", ">>, [item_or_inner_list(Value) || Value <- List]). 347 348item_or_inner_list(Value={with_params, List, _}) when is_list(List) -> 349 inner_list(Value); 350item_or_inner_list(Value) -> 351 item(Value). 352 353inner_list({with_params, List, Params}) -> 354 [$(, lists:join($\s, [item(Value) || Value <- List]), $), params(Params)]. 355 356bare_item({string, String}) -> 357 [$", escape_string(String, <<>>), $"]; 358bare_item({token, Token}) -> 359 Token; 360bare_item({binary, Binary}) -> 361 [$*, base64:encode(Binary), $*]; 362bare_item(Integer) when is_integer(Integer) -> 363 integer_to_binary(Integer); 364%% In order to properly reproduce the float as a string we 365%% must first determine how many decimals we want in the 366%% fractional component, otherwise rounding errors may occur. 367bare_item(Float) when is_float(Float) -> 368 Decimals = case trunc(Float) of 369 I when I >= 10000000000000 -> 1; 370 I when I >= 1000000000000 -> 2; 371 I when I >= 100000000000 -> 3; 372 I when I >= 10000000000 -> 4; 373 I when I >= 1000000000 -> 5; 374 _ -> 6 375 end, 376 float_to_binary(Float, [{decimals, Decimals}, compact]); 377bare_item(true) -> 378 <<"?1">>; 379bare_item(false) -> 380 <<"?0">>. 381 382escape_string(<<>>, Acc) -> Acc; 383escape_string(<<$\\,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$\\>>); 384escape_string(<<$",R/bits>>, Acc) -> escape_string(R, <<Acc/binary,$\\,$">>); 385escape_string(<<C,R/bits>>, Acc) -> escape_string(R, <<Acc/binary,C>>). 386 387params(Params) -> 388 maps:fold(fun 389 (Key, undefined, Acc) -> 390 [[$;, Key]|Acc]; 391 (Key, Value, Acc) -> 392 [[$;, Key, $=, bare_item(Value)]|Acc] 393 end, [], Params). 394 395-ifdef(TEST). 396struct_hd_identity_test_() -> 397 Files = filelib:wildcard("deps/structured-header-tests/*.json"), 398 lists:flatten([begin 399 {ok, JSON} = file:read_file(File), 400 Tests = jsx:decode(JSON, [return_maps]), 401 [ 402 {iolist_to_binary(io_lib:format("~s: ~s", [filename:basename(File), Name])), fun() -> 403 Expected = expected_to_term(Expected0), 404 case HeaderType of 405 <<"dictionary">> -> 406 {Expected, _Order} = parse_dictionary(iolist_to_binary(dictionary(Expected))); 407 <<"item">> -> 408 Expected = parse_item(iolist_to_binary(item(Expected))); 409 <<"list">> -> 410 Expected = parse_list(iolist_to_binary(list(Expected))) 411 end 412 end} 413 || #{ 414 <<"name">> := Name, 415 <<"header_type">> := HeaderType, 416 %% We only run tests that must not fail. 417 <<"expected">> := Expected0 418 } <- Tests] 419 end || File <- Files]). 420-endif. 421