1%% @author Bob Ippolito <bob@mochimedia.com>
2%% @copyright 2007 Mochi Media, Inc.
3
4%% @doc Case preserving (but case insensitive) HTTP Header dictionary.
5
6-module(mochiweb_headers).
7-author('bob@mochimedia.com').
8-export([empty/0, from_list/1, insert/3, enter/3, get_value/2, lookup/2]).
9-export([delete_any/2, get_primary_value/2, get_combined_value/2]).
10-export([default/3, enter_from_list/2, default_from_list/2]).
11-export([to_list/1, make/1]).
12-export([from_binary/1]).
13
14%% @type headers().
15%% @type key() = atom() | binary() | string().
16%% @type value() = atom() | binary() | string() | integer().
17
18%% @spec empty() -> headers()
19%% @doc Create an empty headers structure.
20empty() ->
21    gb_trees:empty().
22
23%% @spec make(headers() | [{key(), value()}]) -> headers()
24%% @doc Construct a headers() from the given list.
25make(L) when is_list(L) ->
26    from_list(L);
27%% assume a non-list is already mochiweb_headers.
28make(T) ->
29    T.
30
31%% @spec from_binary(iolist()) -> headers()
32%% @doc Transforms a raw HTTP header into a mochiweb headers structure.
33%%
34%%      The given raw HTTP header can be one of the following:
35%%
36%%      1) A string or a binary representing a full HTTP header ending with
37%%         double CRLF.
38%%         Examples:
39%%         ```
40%%         "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n"
41%%         <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>'''
42%%
43%%      2) A list of binaries or strings where each element represents a raw
44%%         HTTP header line ending with a single CRLF.
45%%         Examples:
46%%         ```
47%%         [<<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>]
48%%         ["Content-Length: 47\r\n", "Content-Type: text/plain\r\n"]
49%%         ["Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">>]'''
50%%
51from_binary(RawHttpHeader) when is_binary(RawHttpHeader) ->
52    from_binary(RawHttpHeader, []);
53from_binary(RawHttpHeaderList) ->
54    from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])).
55
56from_binary(RawHttpHeader, Acc) ->
57    case erlang:decode_packet(httph, RawHttpHeader, []) of
58        {ok, {http_header, _, H, _, V}, Rest} ->
59            from_binary(Rest, [{H, V} | Acc]);
60        _ ->
61            make(Acc)
62    end.
63
64%% @spec from_list([{key(), value()}]) -> headers()
65%% @doc Construct a headers() from the given list.
66from_list(List) ->
67    lists:foldl(fun ({K, V}, T) -> insert(K, V, T) end, empty(), List).
68
69%% @spec enter_from_list([{key(), value()}], headers()) -> headers()
70%% @doc Insert pairs into the headers, replace any values for existing keys.
71enter_from_list(List, T) ->
72    lists:foldl(fun ({K, V}, T1) -> enter(K, V, T1) end, T, List).
73
74%% @spec default_from_list([{key(), value()}], headers()) -> headers()
75%% @doc Insert pairs into the headers for keys that do not already exist.
76default_from_list(List, T) ->
77    lists:foldl(fun ({K, V}, T1) -> default(K, V, T1) end, T, List).
78
79%% @spec to_list(headers()) -> [{key(), string()}]
80%% @doc Return the contents of the headers. The keys will be the exact key
81%%      that was first inserted (e.g. may be an atom or binary, case is
82%%      preserved).
83to_list(T) ->
84    F = fun ({K, {array, L}}, Acc) ->
85                L1 = lists:reverse(L),
86                lists:foldl(fun (V, Acc1) -> [{K, V} | Acc1] end, Acc, L1);
87            (Pair, Acc) ->
88                [Pair | Acc]
89        end,
90    lists:reverse(lists:foldl(F, [], gb_trees:values(T))).
91
92%% @spec get_value(key(), headers()) -> string() | undefined
93%% @doc Return the value of the given header using a case insensitive search.
94%%      undefined will be returned for keys that are not present.
95get_value(K, T) ->
96    case lookup(K, T) of
97        {value, {_, V}} ->
98            expand(V);
99        none ->
100            undefined
101    end.
102
103%% @spec get_primary_value(key(), headers()) -> string() | undefined
104%% @doc Return the value of the given header up to the first semicolon using
105%%      a case insensitive search. undefined will be returned for keys
106%%      that are not present.
107get_primary_value(K, T) ->
108    case get_value(K, T) of
109        undefined ->
110            undefined;
111        V ->
112            lists:takewhile(fun (C) -> C =/= $; end, V)
113    end.
114
115%% @spec get_combined_value(key(), headers()) -> string() | undefined
116%% @doc Return the value from the given header using a case insensitive search.
117%%      If the value of the header is a comma-separated list where holds values
118%%      are all identical, the identical value will be returned.
119%%      undefined will be returned for keys that are not present or the
120%%      values in the list are not the same.
121%%
122%%      NOTE: The process isn't designed for a general purpose. If you need
123%%            to access all values in the combined header, please refer to
124%%            '''tokenize_header_value/1'''.
125%%
126%%      Section 4.2 of the RFC 2616 (HTTP 1.1) describes multiple message-header
127%%      fields with the same field-name may be present in a message if and only
128%%      if the entire field-value for that header field is defined as a
129%%      comma-separated list [i.e., #(values)].
130get_combined_value(K, T) ->
131    case get_value(K, T) of
132        undefined ->
133            undefined;
134        V ->
135            case sets:to_list(sets:from_list(tokenize_header_value(V))) of
136                [Val] ->
137                    Val;
138                _ ->
139                    undefined
140            end
141    end.
142
143%% @spec lookup(key(), headers()) -> {value, {key(), string()}} | none
144%% @doc Return the case preserved key and value for the given header using
145%%      a case insensitive search. none will be returned for keys that are
146%%      not present.
147lookup(K, T) ->
148    case gb_trees:lookup(normalize(K), T) of
149        {value, {K0, V}} ->
150            {value, {K0, expand(V)}};
151        none ->
152            none
153    end.
154
155%% @spec default(key(), value(), headers()) -> headers()
156%% @doc Insert the pair into the headers if it does not already exist.
157default(K, V, T) ->
158    K1 = normalize(K),
159    V1 = any_to_list(V),
160    try gb_trees:insert(K1, {K, V1}, T)
161    catch
162        error:{key_exists, _} ->
163            T
164    end.
165
166%% @spec enter(key(), value(), headers()) -> headers()
167%% @doc Insert the pair into the headers, replacing any pre-existing key.
168enter(K, V, T) ->
169    K1 = normalize(K),
170    V1 = any_to_list(V),
171    gb_trees:enter(K1, {K, V1}, T).
172
173%% @spec insert(key(), value(), headers()) -> headers()
174%% @doc Insert the pair into the headers, merging with any pre-existing key.
175%%      A merge is done with Value = V0 ++ ", " ++ V1.
176insert(K, V, T) ->
177    K1 = normalize(K),
178    V1 = any_to_list(V),
179    try gb_trees:insert(K1, {K, V1}, T)
180    catch
181        error:{key_exists, _} ->
182            {K0, V0} = gb_trees:get(K1, T),
183            V2 = merge(K1, V1, V0),
184            gb_trees:update(K1, {K0, V2}, T)
185    end.
186
187%% @spec delete_any(key(), headers()) -> headers()
188%% @doc Delete the header corresponding to key if it is present.
189delete_any(K, T) ->
190    K1 = normalize(K),
191    gb_trees:delete_any(K1, T).
192
193%% Internal API
194
195tokenize_header_value(undefined) ->
196    undefined;
197tokenize_header_value(V) ->
198    reversed_tokens(trim_and_reverse(V, false), [], []).
199
200trim_and_reverse([S | Rest], Reversed) when S=:=$ ; S=:=$\n; S=:=$\t ->
201    trim_and_reverse(Rest, Reversed);
202trim_and_reverse(V, false) ->
203    trim_and_reverse(lists:reverse(V), true);
204trim_and_reverse(V, true) ->
205    V.
206
207reversed_tokens([], [], Acc) ->
208    Acc;
209reversed_tokens([], Token, Acc) ->
210    [Token | Acc];
211reversed_tokens("\"" ++ Rest, [], Acc) ->
212    case extract_quoted_string(Rest, []) of
213        {String, NewRest} ->
214            reversed_tokens(NewRest, [], [String | Acc]);
215        undefined ->
216            undefined
217    end;
218reversed_tokens("\"" ++ _Rest, _Token, _Acc) ->
219    undefined;
220reversed_tokens([C | Rest], [], Acc) when C=:=$ ;C=:=$\n;C=:=$\t;C=:=$, ->
221    reversed_tokens(Rest, [], Acc);
222reversed_tokens([C | Rest], Token, Acc) when C=:=$ ;C=:=$\n;C=:=$\t;C=:=$, ->
223    reversed_tokens(Rest, [], [Token | Acc]);
224reversed_tokens([C | Rest], Token, Acc) ->
225    reversed_tokens(Rest, [C | Token], Acc);
226reversed_tokens(_, _, _) ->
227    undefeined.
228
229extract_quoted_string([], _Acc) ->
230    undefined;
231extract_quoted_string("\"\\" ++ Rest, Acc) ->
232    extract_quoted_string(Rest, "\"" ++ Acc);
233extract_quoted_string("\"" ++ Rest, Acc) ->
234    {Acc, Rest};
235extract_quoted_string([C | Rest], Acc) ->
236    extract_quoted_string(Rest, [C | Acc]).
237
238expand({array, L}) ->
239    mochiweb_util:join(lists:reverse(L), ", ");
240expand(V) ->
241    V.
242
243merge("set-cookie", V1, {array, L}) ->
244    {array, [V1 | L]};
245merge("set-cookie", V1, V0) ->
246    {array, [V1, V0]};
247merge(_, V1, V0) ->
248    V0 ++ ", " ++ V1.
249
250normalize(K) when is_list(K) ->
251    string:to_lower(K);
252normalize(K) when is_atom(K) ->
253    normalize(atom_to_list(K));
254normalize(K) when is_binary(K) ->
255    normalize(binary_to_list(K)).
256
257any_to_list(V) when is_list(V) ->
258    V;
259any_to_list(V) when is_atom(V) ->
260    atom_to_list(V);
261any_to_list(V) when is_binary(V) ->
262    binary_to_list(V);
263any_to_list(V) when is_integer(V) ->
264    integer_to_list(V).
265
266%%
267%% Tests.
268%%
269-ifdef(TEST).
270-include_lib("eunit/include/eunit.hrl").
271
272make_test() ->
273    Identity = make([{hdr, foo}]),
274    ?assertEqual(
275       Identity,
276       make(Identity)).
277
278enter_from_list_test() ->
279    H = make([{hdr, foo}]),
280    ?assertEqual(
281       [{baz, "wibble"}, {hdr, "foo"}],
282       to_list(enter_from_list([{baz, wibble}], H))),
283    ?assertEqual(
284       [{hdr, "bar"}],
285       to_list(enter_from_list([{hdr, bar}], H))),
286    ok.
287
288default_from_list_test() ->
289    H = make([{hdr, foo}]),
290    ?assertEqual(
291       [{baz, "wibble"}, {hdr, "foo"}],
292       to_list(default_from_list([{baz, wibble}], H))),
293    ?assertEqual(
294       [{hdr, "foo"}],
295       to_list(default_from_list([{hdr, bar}], H))),
296    ok.
297
298get_primary_value_test() ->
299    H = make([{hdr, foo}, {baz, <<"wibble;taco">>}]),
300    ?assertEqual(
301       "foo",
302       get_primary_value(hdr, H)),
303    ?assertEqual(
304       undefined,
305       get_primary_value(bar, H)),
306    ?assertEqual(
307       "wibble",
308       get_primary_value(<<"baz">>, H)),
309    ok.
310
311get_combined_value_test() ->
312    H = make([{hdr, foo}, {baz, <<"wibble,taco">>}, {content_length, "123, 123"},
313              {test, " 123,  123,     123  , 123,123 "},
314              {test2, "456,  123,     123  , 123"},
315              {test3, "123"}, {test4, " 123, "}]),
316    ?assertEqual(
317       "foo",
318       get_combined_value(hdr, H)),
319    ?assertEqual(
320       undefined,
321       get_combined_value(bar, H)),
322    ?assertEqual(
323       undefined,
324       get_combined_value(<<"baz">>, H)),
325    ?assertEqual(
326       "123",
327       get_combined_value(<<"content_length">>, H)),
328    ?assertEqual(
329       "123",
330       get_combined_value(<<"test">>, H)),
331    ?assertEqual(
332       undefined,
333       get_combined_value(<<"test2">>, H)),
334    ?assertEqual(
335       "123",
336       get_combined_value(<<"test3">>, H)),
337    ?assertEqual(
338       "123",
339       get_combined_value(<<"test4">>, H)),
340    ok.
341
342set_cookie_test() ->
343    H = make([{"set-cookie", foo}, {"set-cookie", bar}, {"set-cookie", baz}]),
344    ?assertEqual(
345       [{"set-cookie", "foo"}, {"set-cookie", "bar"}, {"set-cookie", "baz"}],
346       to_list(H)),
347    ok.
348
349headers_test() ->
350    H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]),
351    [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H),
352    H1 = ?MODULE:insert(taco, grande, H),
353    [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1),
354    H2 = ?MODULE:make([{"Set-Cookie", "foo"}]),
355    [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2),
356    H3 = ?MODULE:insert("Set-Cookie", "bar", H2),
357    [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3),
358    "foo, bar" = ?MODULE:get_value("set-cookie", H3),
359    {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3),
360    undefined = ?MODULE:get_value("shibby", H3),
361    none = ?MODULE:lookup("shibby", H3),
362    H4 = ?MODULE:insert("content-type",
363                        "application/x-www-form-urlencoded; charset=utf8",
364                        H3),
365    "application/x-www-form-urlencoded" = ?MODULE:get_primary_value(
366                                             "content-type", H4),
367    H4 = ?MODULE:delete_any("nonexistent-header", H4),
368    H3 = ?MODULE:delete_any("content-type", H4),
369    HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
370    H_HB = ?MODULE:from_binary(HB),
371    H_HB = ?MODULE:from_binary(binary_to_list(HB)),
372    "47" = ?MODULE:get_value("Content-Length", H_HB),
373    "text/plain" = ?MODULE:get_value("Content-Type", H_HB),
374    L_H_HB = ?MODULE:to_list(H_HB),
375    2 = length(L_H_HB),
376    true = lists:member({'Content-Length', "47"}, L_H_HB),
377    true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
378    HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ],
379    HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
380    HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
381    H_HL = ?MODULE:from_binary(HL),
382    H_HL = ?MODULE:from_binary(HL2),
383    H_HL = ?MODULE:from_binary(HL3),
384    "47" = ?MODULE:get_value("Content-Length", H_HL),
385    "text/plain" = ?MODULE:get_value("Content-Type", H_HL),
386    L_H_HL = ?MODULE:to_list(H_HL),
387    2 = length(L_H_HL),
388    true = lists:member({'Content-Length', "47"}, L_H_HL),
389    true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
390    [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
391    [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
392    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
393    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
394    [] = ?MODULE:to_list(?MODULE:from_binary("")),
395    [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
396    [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
397    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
398    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
399    ok.
400
401tokenize_header_value_test() ->
402    ?assertEqual(["a quote in a \"quote\"."],
403                 tokenize_header_value("\"a quote in a \\\"quote\\\".\"")),
404    ?assertEqual(["abc"], tokenize_header_value("abc")),
405    ?assertEqual(["abc", "def"], tokenize_header_value("abc def")),
406    ?assertEqual(["abc", "def"], tokenize_header_value("abc , def")),
407    ?assertEqual(["abc", "def"], tokenize_header_value(",abc ,, def,,")),
408    ?assertEqual(["abc def"], tokenize_header_value("\"abc def\"      ")),
409    ?assertEqual(["abc, def"], tokenize_header_value("\"abc, def\"")),
410    ?assertEqual(["\\a\\$"], tokenize_header_value("\"\\a\\$\"")),
411    ?assertEqual(["abc def", "foo, bar", "12345", ""],
412                 tokenize_header_value("\"abc def\" \"foo, bar\" , 12345, \"\"")),
413    ?assertEqual(undefined,
414                 tokenize_header_value(undefined)),
415    ?assertEqual(undefined,
416                 tokenize_header_value("umatched quote\"")),
417    ?assertEqual(undefined,
418                 tokenize_header_value("\"unmatched quote")).
419
420-endif.
421