1%% @author Bob Ippolito <bob@mochimedia.com> 2%% @copyright 2007 Mochi Media, Inc. 3 4%% @doc Case preserving (but case insensitive) HTTP Header dictionary. 5 6-module(mochiweb_headers). 7-author('bob@mochimedia.com'). 8-export([empty/0, from_list/1, insert/3, enter/3, get_value/2, lookup/2]). 9-export([delete_any/2, get_primary_value/2, get_combined_value/2]). 10-export([default/3, enter_from_list/2, default_from_list/2]). 11-export([to_list/1, make/1]). 12-export([from_binary/1]). 13 14%% @type headers(). 15%% @type key() = atom() | binary() | string(). 16%% @type value() = atom() | binary() | string() | integer(). 17 18%% @spec empty() -> headers() 19%% @doc Create an empty headers structure. 20empty() -> 21 gb_trees:empty(). 22 23%% @spec make(headers() | [{key(), value()}]) -> headers() 24%% @doc Construct a headers() from the given list. 25make(L) when is_list(L) -> 26 from_list(L); 27%% assume a non-list is already mochiweb_headers. 28make(T) -> 29 T. 30 31%% @spec from_binary(iolist()) -> headers() 32%% @doc Transforms a raw HTTP header into a mochiweb headers structure. 33%% 34%% The given raw HTTP header can be one of the following: 35%% 36%% 1) A string or a binary representing a full HTTP header ending with 37%% double CRLF. 38%% Examples: 39%% ``` 40%% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n" 41%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>''' 42%% 43%% 2) A list of binaries or strings where each element represents a raw 44%% HTTP header line ending with a single CRLF. 45%% Examples: 46%% ``` 47%% [<<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>] 48%% ["Content-Length: 47\r\n", "Content-Type: text/plain\r\n"] 49%% ["Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">>]''' 50%% 51from_binary(RawHttpHeader) when is_binary(RawHttpHeader) -> 52 from_binary(RawHttpHeader, []); 53from_binary(RawHttpHeaderList) -> 54 from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])). 55 56from_binary(RawHttpHeader, Acc) -> 57 case erlang:decode_packet(httph, RawHttpHeader, []) of 58 {ok, {http_header, _, H, _, V}, Rest} -> 59 from_binary(Rest, [{H, V} | Acc]); 60 _ -> 61 make(Acc) 62 end. 63 64%% @spec from_list([{key(), value()}]) -> headers() 65%% @doc Construct a headers() from the given list. 66from_list(List) -> 67 lists:foldl(fun ({K, V}, T) -> insert(K, V, T) end, empty(), List). 68 69%% @spec enter_from_list([{key(), value()}], headers()) -> headers() 70%% @doc Insert pairs into the headers, replace any values for existing keys. 71enter_from_list(List, T) -> 72 lists:foldl(fun ({K, V}, T1) -> enter(K, V, T1) end, T, List). 73 74%% @spec default_from_list([{key(), value()}], headers()) -> headers() 75%% @doc Insert pairs into the headers for keys that do not already exist. 76default_from_list(List, T) -> 77 lists:foldl(fun ({K, V}, T1) -> default(K, V, T1) end, T, List). 78 79%% @spec to_list(headers()) -> [{key(), string()}] 80%% @doc Return the contents of the headers. The keys will be the exact key 81%% that was first inserted (e.g. may be an atom or binary, case is 82%% preserved). 83to_list(T) -> 84 F = fun ({K, {array, L}}, Acc) -> 85 L1 = lists:reverse(L), 86 lists:foldl(fun (V, Acc1) -> [{K, V} | Acc1] end, Acc, L1); 87 (Pair, Acc) -> 88 [Pair | Acc] 89 end, 90 lists:reverse(lists:foldl(F, [], gb_trees:values(T))). 91 92%% @spec get_value(key(), headers()) -> string() | undefined 93%% @doc Return the value of the given header using a case insensitive search. 94%% undefined will be returned for keys that are not present. 95get_value(K, T) -> 96 case lookup(K, T) of 97 {value, {_, V}} -> 98 expand(V); 99 none -> 100 undefined 101 end. 102 103%% @spec get_primary_value(key(), headers()) -> string() | undefined 104%% @doc Return the value of the given header up to the first semicolon using 105%% a case insensitive search. undefined will be returned for keys 106%% that are not present. 107get_primary_value(K, T) -> 108 case get_value(K, T) of 109 undefined -> 110 undefined; 111 V -> 112 lists:takewhile(fun (C) -> C =/= $; end, V) 113 end. 114 115%% @spec get_combined_value(key(), headers()) -> string() | undefined 116%% @doc Return the value from the given header using a case insensitive search. 117%% If the value of the header is a comma-separated list where holds values 118%% are all identical, the identical value will be returned. 119%% undefined will be returned for keys that are not present or the 120%% values in the list are not the same. 121%% 122%% NOTE: The process isn't designed for a general purpose. If you need 123%% to access all values in the combined header, please refer to 124%% '''tokenize_header_value/1'''. 125%% 126%% Section 4.2 of the RFC 2616 (HTTP 1.1) describes multiple message-header 127%% fields with the same field-name may be present in a message if and only 128%% if the entire field-value for that header field is defined as a 129%% comma-separated list [i.e., #(values)]. 130get_combined_value(K, T) -> 131 case get_value(K, T) of 132 undefined -> 133 undefined; 134 V -> 135 case sets:to_list(sets:from_list(tokenize_header_value(V))) of 136 [Val] -> 137 Val; 138 _ -> 139 undefined 140 end 141 end. 142 143%% @spec lookup(key(), headers()) -> {value, {key(), string()}} | none 144%% @doc Return the case preserved key and value for the given header using 145%% a case insensitive search. none will be returned for keys that are 146%% not present. 147lookup(K, T) -> 148 case gb_trees:lookup(normalize(K), T) of 149 {value, {K0, V}} -> 150 {value, {K0, expand(V)}}; 151 none -> 152 none 153 end. 154 155%% @spec default(key(), value(), headers()) -> headers() 156%% @doc Insert the pair into the headers if it does not already exist. 157default(K, V, T) -> 158 K1 = normalize(K), 159 V1 = any_to_list(V), 160 try gb_trees:insert(K1, {K, V1}, T) 161 catch 162 error:{key_exists, _} -> 163 T 164 end. 165 166%% @spec enter(key(), value(), headers()) -> headers() 167%% @doc Insert the pair into the headers, replacing any pre-existing key. 168enter(K, V, T) -> 169 K1 = normalize(K), 170 V1 = any_to_list(V), 171 gb_trees:enter(K1, {K, V1}, T). 172 173%% @spec insert(key(), value(), headers()) -> headers() 174%% @doc Insert the pair into the headers, merging with any pre-existing key. 175%% A merge is done with Value = V0 ++ ", " ++ V1. 176insert(K, V, T) -> 177 K1 = normalize(K), 178 V1 = any_to_list(V), 179 try gb_trees:insert(K1, {K, V1}, T) 180 catch 181 error:{key_exists, _} -> 182 {K0, V0} = gb_trees:get(K1, T), 183 V2 = merge(K1, V1, V0), 184 gb_trees:update(K1, {K0, V2}, T) 185 end. 186 187%% @spec delete_any(key(), headers()) -> headers() 188%% @doc Delete the header corresponding to key if it is present. 189delete_any(K, T) -> 190 K1 = normalize(K), 191 gb_trees:delete_any(K1, T). 192 193%% Internal API 194 195tokenize_header_value(undefined) -> 196 undefined; 197tokenize_header_value(V) -> 198 reversed_tokens(trim_and_reverse(V, false), [], []). 199 200trim_and_reverse([S | Rest], Reversed) when S=:=$ ; S=:=$\n; S=:=$\t -> 201 trim_and_reverse(Rest, Reversed); 202trim_and_reverse(V, false) -> 203 trim_and_reverse(lists:reverse(V), true); 204trim_and_reverse(V, true) -> 205 V. 206 207reversed_tokens([], [], Acc) -> 208 Acc; 209reversed_tokens([], Token, Acc) -> 210 [Token | Acc]; 211reversed_tokens("\"" ++ Rest, [], Acc) -> 212 case extract_quoted_string(Rest, []) of 213 {String, NewRest} -> 214 reversed_tokens(NewRest, [], [String | Acc]); 215 undefined -> 216 undefined 217 end; 218reversed_tokens("\"" ++ _Rest, _Token, _Acc) -> 219 undefined; 220reversed_tokens([C | Rest], [], Acc) when C=:=$ ;C=:=$\n;C=:=$\t;C=:=$, -> 221 reversed_tokens(Rest, [], Acc); 222reversed_tokens([C | Rest], Token, Acc) when C=:=$ ;C=:=$\n;C=:=$\t;C=:=$, -> 223 reversed_tokens(Rest, [], [Token | Acc]); 224reversed_tokens([C | Rest], Token, Acc) -> 225 reversed_tokens(Rest, [C | Token], Acc); 226reversed_tokens(_, _, _) -> 227 undefeined. 228 229extract_quoted_string([], _Acc) -> 230 undefined; 231extract_quoted_string("\"\\" ++ Rest, Acc) -> 232 extract_quoted_string(Rest, "\"" ++ Acc); 233extract_quoted_string("\"" ++ Rest, Acc) -> 234 {Acc, Rest}; 235extract_quoted_string([C | Rest], Acc) -> 236 extract_quoted_string(Rest, [C | Acc]). 237 238expand({array, L}) -> 239 mochiweb_util:join(lists:reverse(L), ", "); 240expand(V) -> 241 V. 242 243merge("set-cookie", V1, {array, L}) -> 244 {array, [V1 | L]}; 245merge("set-cookie", V1, V0) -> 246 {array, [V1, V0]}; 247merge(_, V1, V0) -> 248 V0 ++ ", " ++ V1. 249 250normalize(K) when is_list(K) -> 251 string:to_lower(K); 252normalize(K) when is_atom(K) -> 253 normalize(atom_to_list(K)); 254normalize(K) when is_binary(K) -> 255 normalize(binary_to_list(K)). 256 257any_to_list(V) when is_list(V) -> 258 V; 259any_to_list(V) when is_atom(V) -> 260 atom_to_list(V); 261any_to_list(V) when is_binary(V) -> 262 binary_to_list(V); 263any_to_list(V) when is_integer(V) -> 264 integer_to_list(V). 265 266%% 267%% Tests. 268%% 269-ifdef(TEST). 270-include_lib("eunit/include/eunit.hrl"). 271 272make_test() -> 273 Identity = make([{hdr, foo}]), 274 ?assertEqual( 275 Identity, 276 make(Identity)). 277 278enter_from_list_test() -> 279 H = make([{hdr, foo}]), 280 ?assertEqual( 281 [{baz, "wibble"}, {hdr, "foo"}], 282 to_list(enter_from_list([{baz, wibble}], H))), 283 ?assertEqual( 284 [{hdr, "bar"}], 285 to_list(enter_from_list([{hdr, bar}], H))), 286 ok. 287 288default_from_list_test() -> 289 H = make([{hdr, foo}]), 290 ?assertEqual( 291 [{baz, "wibble"}, {hdr, "foo"}], 292 to_list(default_from_list([{baz, wibble}], H))), 293 ?assertEqual( 294 [{hdr, "foo"}], 295 to_list(default_from_list([{hdr, bar}], H))), 296 ok. 297 298get_primary_value_test() -> 299 H = make([{hdr, foo}, {baz, <<"wibble;taco">>}]), 300 ?assertEqual( 301 "foo", 302 get_primary_value(hdr, H)), 303 ?assertEqual( 304 undefined, 305 get_primary_value(bar, H)), 306 ?assertEqual( 307 "wibble", 308 get_primary_value(<<"baz">>, H)), 309 ok. 310 311get_combined_value_test() -> 312 H = make([{hdr, foo}, {baz, <<"wibble,taco">>}, {content_length, "123, 123"}, 313 {test, " 123, 123, 123 , 123,123 "}, 314 {test2, "456, 123, 123 , 123"}, 315 {test3, "123"}, {test4, " 123, "}]), 316 ?assertEqual( 317 "foo", 318 get_combined_value(hdr, H)), 319 ?assertEqual( 320 undefined, 321 get_combined_value(bar, H)), 322 ?assertEqual( 323 undefined, 324 get_combined_value(<<"baz">>, H)), 325 ?assertEqual( 326 "123", 327 get_combined_value(<<"content_length">>, H)), 328 ?assertEqual( 329 "123", 330 get_combined_value(<<"test">>, H)), 331 ?assertEqual( 332 undefined, 333 get_combined_value(<<"test2">>, H)), 334 ?assertEqual( 335 "123", 336 get_combined_value(<<"test3">>, H)), 337 ?assertEqual( 338 "123", 339 get_combined_value(<<"test4">>, H)), 340 ok. 341 342set_cookie_test() -> 343 H = make([{"set-cookie", foo}, {"set-cookie", bar}, {"set-cookie", baz}]), 344 ?assertEqual( 345 [{"set-cookie", "foo"}, {"set-cookie", "bar"}, {"set-cookie", "baz"}], 346 to_list(H)), 347 ok. 348 349headers_test() -> 350 H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]), 351 [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H), 352 H1 = ?MODULE:insert(taco, grande, H), 353 [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1), 354 H2 = ?MODULE:make([{"Set-Cookie", "foo"}]), 355 [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2), 356 H3 = ?MODULE:insert("Set-Cookie", "bar", H2), 357 [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3), 358 "foo, bar" = ?MODULE:get_value("set-cookie", H3), 359 {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3), 360 undefined = ?MODULE:get_value("shibby", H3), 361 none = ?MODULE:lookup("shibby", H3), 362 H4 = ?MODULE:insert("content-type", 363 "application/x-www-form-urlencoded; charset=utf8", 364 H3), 365 "application/x-www-form-urlencoded" = ?MODULE:get_primary_value( 366 "content-type", H4), 367 H4 = ?MODULE:delete_any("nonexistent-header", H4), 368 H3 = ?MODULE:delete_any("content-type", H4), 369 HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>, 370 H_HB = ?MODULE:from_binary(HB), 371 H_HB = ?MODULE:from_binary(binary_to_list(HB)), 372 "47" = ?MODULE:get_value("Content-Length", H_HB), 373 "text/plain" = ?MODULE:get_value("Content-Type", H_HB), 374 L_H_HB = ?MODULE:to_list(H_HB), 375 2 = length(L_H_HB), 376 true = lists:member({'Content-Length', "47"}, L_H_HB), 377 true = lists:member({'Content-Type', "text/plain"}, L_H_HB), 378 HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ], 379 HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ], 380 HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ], 381 H_HL = ?MODULE:from_binary(HL), 382 H_HL = ?MODULE:from_binary(HL2), 383 H_HL = ?MODULE:from_binary(HL3), 384 "47" = ?MODULE:get_value("Content-Length", H_HL), 385 "text/plain" = ?MODULE:get_value("Content-Type", H_HL), 386 L_H_HL = ?MODULE:to_list(H_HL), 387 2 = length(L_H_HL), 388 true = lists:member({'Content-Length', "47"}, L_H_HL), 389 true = lists:member({'Content-Type', "text/plain"}, L_H_HL), 390 [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)), 391 [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)), 392 [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)), 393 [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)), 394 [] = ?MODULE:to_list(?MODULE:from_binary("")), 395 [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])), 396 [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])), 397 [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])), 398 [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])), 399 ok. 400 401tokenize_header_value_test() -> 402 ?assertEqual(["a quote in a \"quote\"."], 403 tokenize_header_value("\"a quote in a \\\"quote\\\".\"")), 404 ?assertEqual(["abc"], tokenize_header_value("abc")), 405 ?assertEqual(["abc", "def"], tokenize_header_value("abc def")), 406 ?assertEqual(["abc", "def"], tokenize_header_value("abc , def")), 407 ?assertEqual(["abc", "def"], tokenize_header_value(",abc ,, def,,")), 408 ?assertEqual(["abc def"], tokenize_header_value("\"abc def\" ")), 409 ?assertEqual(["abc, def"], tokenize_header_value("\"abc, def\"")), 410 ?assertEqual(["\\a\\$"], tokenize_header_value("\"\\a\\$\"")), 411 ?assertEqual(["abc def", "foo, bar", "12345", ""], 412 tokenize_header_value("\"abc def\" \"foo, bar\" , 12345, \"\"")), 413 ?assertEqual(undefined, 414 tokenize_header_value(undefined)), 415 ?assertEqual(undefined, 416 tokenize_header_value("umatched quote\"")), 417 ?assertEqual(undefined, 418 tokenize_header_value("\"unmatched quote")). 419 420-endif. 421