1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5% http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12%
13% bind_path is based on bind method from Webmachine
14
15
16%% @doc Module for URL rewriting by pattern matching.
17
18-module(couch_httpd_rewrite).
19
20-compile(tuple_calls).
21
22-export([handle_rewrite_req/3]).
23-include_lib("couch/include/couch_db.hrl").
24
25-define(SEPARATOR, $\/).
26-define(MATCH_ALL, {bind, <<"*">>}).
27
28
29%% doc The http rewrite handler. All rewriting is done from
30%% /dbname/_design/ddocname/_rewrite by default.
31%%
32%% each rules should be in rewrites member of the design doc.
33%% Ex of a complete rule :
34%%
35%%  {
36%%      ....
37%%      "rewrites": [
38%%      {
39%%          "from": "",
40%%          "to": "index.html",
41%%          "method": "GET",
42%%          "query": {}
43%%      }
44%%      ]
45%%  }
46%%
47%%  from: is the path rule used to bind current uri to the rule. It
48%% use pattern matching for that.
49%%
50%%  to: rule to rewrite an url. It can contain variables depending on binding
51%% variables discovered during pattern matching and query args (url args and from
52%% the query member.)
53%%
54%%  method: method to bind the request method to the rule. by default "*"
55%%  query: query args you want to define they can contain dynamic variable
56%% by binding the key to the bindings
57%%
58%%
59%% to and from are path with  patterns. pattern can be string starting with ":" or
60%% "*". ex:
61%% /somepath/:var/*
62%%
63%% This path is converted in erlang list by splitting "/". Each var are
64%% converted in atom. "*" is converted to '*' atom. The pattern matching is done
65%% by splitting "/" in request url in a list of token. A string pattern will
66%% match equal token. The star atom ('*' in single quotes) will match any number
67%% of tokens, but may only be present as the last pathtern in a pathspec. If all
68%% tokens are matched and all pathterms are used, then the pathspec matches. It works
69%% like webmachine. Each identified token will be reused in to rule and in query
70%%
71%% The pattern matching is done by first matching the request method to a rule. by
72%% default all methods match a rule. (method is equal to "*" by default). Then
73%% It will try to match the path to one rule. If no rule match, then a 404 error
74%% is displayed.
75%%
76%% Once a rule is found we rewrite the request url using the "to" and
77%% "query" members. The identified token are matched to the rule and
78%% will replace var. if '*' is found in the rule it will contain the remaining
79%% part if it exists.
80%%
81%% Examples:
82%%
83%% Dispatch rule            URL             TO                  Tokens
84%%
85%% {"from": "/a/b",         /a/b?k=v        /some/b?k=v         var =:= b
86%% "to": "/some/"}                                              k = v
87%%
88%% {"from": "/a/b",         /a/b            /some/b?var=b       var =:= b
89%% "to": "/some/:var"}
90%%
91%% {"from": "/a",           /a              /some
92%% "to": "/some/*"}
93%%
94%% {"from": "/a/*",         /a/b/c          /some/b/c
95%% "to": "/some/*"}
96%%
97%% {"from": "/a",           /a              /some
98%% "to": "/some/*"}
99%%
100%% {"from": "/a/:foo/*",    /a/b/c          /some/b/c?foo=b     foo =:= b
101%% "to": "/some/:foo/*"}
102%%
103%% {"from": "/a/:foo",     /a/b             /some/?k=b&foo=b    foo =:= b
104%% "to": "/some",
105%%  "query": {
106%%      "k": ":foo"
107%%  }}
108%%
109%% {"from": "/a",           /a?foo=b        /some/b             foo =:= b
110%% "to": "/some/:foo",
111%%  }}
112
113
114
115handle_rewrite_req(#httpd{
116        path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts],
117        method=Method,
118        mochi_req=MochiReq}=Req, _Db, DDoc) ->
119
120    % we are in a design handler
121    DesignId = <<"_design/", DesignName/binary>>,
122    Prefix = <<"/", (?l2b(couch_util:url_encode(DbName)))/binary, "/", DesignId/binary>>,
123    QueryList = lists:map(fun decode_query_value/1, couch_httpd:qs(Req)),
124
125    RewritesSoFar = erlang:get(?REWRITE_COUNT),
126    MaxRewrites = chttpd_util:get_chttpd_config_integer("rewrite_limit", 100),
127    case RewritesSoFar >= MaxRewrites of
128        true ->
129            throw({bad_request, <<"Exceeded rewrite recursion limit">>});
130        false ->
131            erlang:put(?REWRITE_COUNT, RewritesSoFar + 1)
132    end,
133
134    #doc{body={Props}} = DDoc,
135
136    % get rules from ddoc
137    case couch_util:get_value(<<"rewrites">>, Props) of
138        undefined ->
139            couch_httpd:send_error(Req, 404, <<"rewrite_error">>,
140                <<"Invalid path.">>);
141        Bin when is_binary(Bin) ->
142            couch_httpd:send_error(Req, 400, <<"rewrite_error">>,
143                <<"Rewrite rules are a String. They must be a JSON Array.">>);
144        Rules ->
145            % create dispatch list from rules
146            DispatchList =  [make_rule(Rule) || {Rule} <- Rules],
147            Method1 = couch_util:to_binary(Method),
148
149            % get raw path by matching url to a rule. Throws not_found.
150            {NewPathParts0, Bindings0} =
151                try_bind_path(DispatchList, Method1, PathParts, QueryList),
152            NewPathParts = [quote_plus(X) || X <- NewPathParts0],
153            Bindings = maybe_encode_bindings(Bindings0),
154
155            Path0 = string:join(NewPathParts, [?SEPARATOR]),
156
157            % if path is relative detect it and rewrite path
158            Path1 = case mochiweb_util:safe_relative_path(Path0) of
159                undefined ->
160                    ?b2l(Prefix) ++ "/" ++ Path0;
161                P1 ->
162                    ?b2l(Prefix) ++ "/" ++ P1
163            end,
164
165            Path2 = normalize_path(Path1),
166
167            Path3 = case Bindings of
168                [] ->
169                    Path2;
170                _ ->
171                    [Path2, "?", mochiweb_util:urlencode(Bindings)]
172            end,
173
174            RawPath1 = ?b2l(iolist_to_binary(Path3)),
175
176            % In order to do OAuth correctly, we have to save the
177            % requested path. We use default so chained rewriting
178            % wont replace the original header.
179            Headers = mochiweb_headers:default("x-couchdb-requested-path",
180                                             MochiReq:get(raw_path),
181                                             MochiReq:get(headers)),
182
183            couch_log:debug("rewrite to ~p ~n", [RawPath1]),
184
185            % build a new mochiweb request
186            MochiReq1 = mochiweb_request:new(MochiReq:get(socket),
187                                             MochiReq:get(method),
188                                             RawPath1,
189                                             MochiReq:get(version),
190                                             Headers),
191
192            % cleanup, It force mochiweb to reparse raw uri.
193            MochiReq1:cleanup(),
194
195            #httpd{
196                db_url_handlers = DbUrlHandlers,
197                design_url_handlers = DesignUrlHandlers,
198                default_fun = DefaultFun,
199                url_handlers = UrlHandlers,
200                user_ctx = UserCtx,
201               auth = Auth
202            } = Req,
203
204            erlang:put(pre_rewrite_auth, Auth),
205            erlang:put(pre_rewrite_user_ctx, UserCtx),
206            couch_httpd:handle_request_int(MochiReq1, DefaultFun,
207                    UrlHandlers, DbUrlHandlers, DesignUrlHandlers)
208        end.
209
210quote_plus({bind, X}) ->
211    mochiweb_util:quote_plus(X);
212quote_plus(X) ->
213    mochiweb_util:quote_plus(X).
214
215%% @doc Try to find a rule matching current url. If none is found
216%% 404 error not_found is raised
217try_bind_path([], _Method, _PathParts, _QueryList) ->
218    throw(not_found);
219try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) ->
220    [{PathParts1, Method1}, RedirectPath, QueryArgs, Formats] = Dispatch,
221    case bind_method(Method1, Method) of
222        true ->
223            case bind_path(PathParts1, PathParts, []) of
224                {ok, Remaining, Bindings} ->
225                    Bindings1 = Bindings ++ QueryList,
226                    % we parse query args from the rule and fill
227                    % it eventually with bindings vars
228                    QueryArgs1 = make_query_list(QueryArgs, Bindings1,
229                        Formats, []),
230                    % remove params in QueryLists1 that are already in
231                    % QueryArgs1
232                    Bindings2 = lists:foldl(fun({K, V}, Acc) ->
233                        K1 = to_binding(K),
234                        KV = case couch_util:get_value(K1, QueryArgs1) of
235                            undefined -> [{K1, V}];
236                            _V1 -> []
237                        end,
238                        Acc ++ KV
239                    end, [], Bindings1),
240
241                    FinalBindings = Bindings2 ++ QueryArgs1,
242                    NewPathParts = make_new_path(RedirectPath, FinalBindings,
243                                    Remaining, []),
244                    {NewPathParts, FinalBindings};
245                fail ->
246                    try_bind_path(Rest, Method, PathParts, QueryList)
247            end;
248        false ->
249            try_bind_path(Rest, Method, PathParts, QueryList)
250    end.
251
252%% rewriting dynamically the quey list given as query member in
253%% rewrites. Each value is replaced by one binding or an argument
254%% passed in url.
255make_query_list([], _Bindings, _Formats, Acc) ->
256    Acc;
257make_query_list([{Key, {Value}}|Rest], Bindings, Formats, Acc) ->
258    Value1 = {Value},
259    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
260make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_binary(Value) ->
261    Value1 = replace_var(Value, Bindings, Formats),
262    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
263make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_list(Value) ->
264    Value1 = replace_var(Value, Bindings, Formats),
265    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]);
266make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) ->
267    make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value}|Acc]).
268
269replace_var(<<"*">>=Value, Bindings, Formats) ->
270    get_var(Value, Bindings, Value, Formats);
271replace_var(<<":", Var/binary>> = Value, Bindings, Formats) ->
272    get_var(Var, Bindings, Value, Formats);
273replace_var(Value, _Bindings, _Formats) when is_binary(Value) ->
274    Value;
275replace_var(Value, Bindings, Formats) when is_list(Value) ->
276    lists:reverse(lists:foldl(fun
277                (<<":", Var/binary>>=Value1, Acc) ->
278                    [get_var(Var, Bindings, Value1, Formats)|Acc];
279                (Value1, Acc) ->
280                    [Value1|Acc]
281            end, [], Value));
282replace_var(Value, _Bindings, _Formats) ->
283    Value.
284
285maybe_json(Key, Value) ->
286    case lists:member(Key, [<<"key">>, <<"startkey">>, <<"start_key">>,
287                <<"endkey">>, <<"end_key">>, <<"keys">>]) of
288        true ->
289            ?JSON_ENCODE(Value);
290        false ->
291            Value
292    end.
293
294get_var(VarName, Props, Default, Formats) ->
295    VarName1 = to_binding(VarName),
296    Val = couch_util:get_value(VarName1, Props, Default),
297    maybe_format(VarName, Val, Formats).
298
299maybe_format(VarName, Value, Formats) ->
300    case couch_util:get_value(VarName, Formats) of
301        undefined ->
302             Value;
303        Format ->
304            format(Format, Value)
305    end.
306
307format(<<"int">>, Value) when is_integer(Value) ->
308    Value;
309format(<<"int">>, Value) when is_binary(Value) ->
310    format(<<"int">>, ?b2l(Value));
311format(<<"int">>, Value) when is_list(Value) ->
312    case (catch list_to_integer(Value)) of
313        IntVal when is_integer(IntVal) ->
314            IntVal;
315        _ ->
316            Value
317    end;
318format(<<"bool">>, Value) when is_binary(Value) ->
319    format(<<"bool">>, ?b2l(Value));
320format(<<"bool">>, Value) when is_list(Value) ->
321    case string:to_lower(Value) of
322        "true" -> true;
323        "false" -> false;
324        _ -> Value
325    end;
326format(_Format, Value) ->
327   Value.
328
329%% doc: build new patch from bindings. bindings are query args
330%% (+ dynamic query rewritten if needed) and bindings found in
331%% bind_path step.
332make_new_path([], _Bindings, _Remaining, Acc) ->
333    lists:reverse(Acc);
334make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) ->
335    Acc1 = lists:reverse(Acc) ++ Remaining,
336    Acc1;
337make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) ->
338    Acc1 = lists:reverse(Acc) ++ Remaining,
339    Acc1;
340make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) ->
341    P2 = case couch_util:get_value({bind, P}, Bindings) of
342        undefined -> << "undefined">>;
343        P1 ->
344            iolist_to_binary(P1)
345    end,
346    make_new_path(Rest, Bindings, Remaining, [P2|Acc]);
347make_new_path([P|Rest], Bindings, Remaining, Acc) ->
348    make_new_path(Rest, Bindings, Remaining, [P|Acc]).
349
350
351%% @doc If method of the query fith the rule method. If the
352%% method rule is '*', which is the default, all
353%% request method will bind. It allows us to make rules
354%% depending on HTTP method.
355bind_method(?MATCH_ALL, _Method ) ->
356    true;
357bind_method({bind, Method}, Method) ->
358    true;
359bind_method(_, _) ->
360    false.
361
362
363%% @doc bind path. Using the rule from we try to bind variables given
364%% to the current url by pattern matching
365bind_path([], [], Bindings) ->
366    {ok, [], Bindings};
367bind_path([?MATCH_ALL], [Match|_RestMatch]=Rest, Bindings) ->
368    {ok, Rest, [{?MATCH_ALL, Match}|Bindings]};
369bind_path(_, [], _) ->
370    fail;
371bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) ->
372    bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]);
373bind_path([Token|RestToken], [Token|RestMatch], Bindings) ->
374    bind_path(RestToken, RestMatch, Bindings);
375bind_path(_, _, _) ->
376    fail.
377
378
379%% normalize path.
380normalize_path(Path)  ->
381    "/" ++ string:join(normalize_path1(string:tokens(Path,
382                "/"), []), [?SEPARATOR]).
383
384
385normalize_path1([], Acc) ->
386    lists:reverse(Acc);
387normalize_path1([".."|Rest], Acc) ->
388    Acc1 = case Acc of
389        [] -> [".."|Acc];
390        [T|_] when T =:= ".." -> [".."|Acc];
391        [_|R] -> R
392    end,
393    normalize_path1(Rest, Acc1);
394normalize_path1(["."|Rest], Acc) ->
395    normalize_path1(Rest, Acc);
396normalize_path1([Path|Rest], Acc) ->
397    normalize_path1(Rest, [Path|Acc]).
398
399
400%% @doc transform json rule in erlang for pattern matching
401make_rule(Rule) ->
402    Method = case couch_util:get_value(<<"method">>, Rule) of
403        undefined -> ?MATCH_ALL;
404        M -> to_binding(M)
405    end,
406    QueryArgs = case couch_util:get_value(<<"query">>, Rule) of
407        undefined -> [];
408        {Args} -> Args
409        end,
410    FromParts  = case couch_util:get_value(<<"from">>, Rule) of
411        undefined -> [?MATCH_ALL];
412        From ->
413            parse_path(From)
414        end,
415    ToParts  = case couch_util:get_value(<<"to">>, Rule) of
416        undefined ->
417            throw({error, invalid_rewrite_target});
418        To ->
419            parse_path(To)
420        end,
421    Formats = case couch_util:get_value(<<"formats">>, Rule) of
422        undefined -> [];
423        {Fmts} -> Fmts
424    end,
425    [{FromParts, Method}, ToParts, QueryArgs, Formats].
426
427parse_path(Path) ->
428    {ok, SlashRE} = re:compile(<<"\\/">>),
429    path_to_list(re:split(Path, SlashRE), [], 0).
430
431%% @doc convert a path rule (from or to) to an erlang list
432%% * and path variable starting by ":" are converted
433%% in erlang atom.
434path_to_list([], Acc, _DotDotCount) ->
435    lists:reverse(Acc);
436path_to_list([<<>>|R], Acc, DotDotCount) ->
437    path_to_list(R, Acc, DotDotCount);
438path_to_list([<<"*">>|R], Acc, DotDotCount) ->
439    path_to_list(R, [?MATCH_ALL|Acc], DotDotCount);
440path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 ->
441    case chttpd_util:get_chttpd_config_boolean("secure_rewrites", true) of
442        false ->
443            path_to_list(R, [<<"..">>|Acc], DotDotCount+1);
444        true ->
445            couch_log:info("insecure_rewrite_rule ~p blocked",
446                [lists:reverse(Acc) ++ [<<"..">>] ++ R]),
447            throw({insecure_rewrite_rule, "too many ../.. segments"})
448    end;
449path_to_list([<<"..">>|R], Acc, DotDotCount) ->
450    path_to_list(R, [<<"..">>|Acc], DotDotCount+1);
451path_to_list([P|R], Acc, DotDotCount) ->
452    P1 = case P of
453        <<":", Var/binary>> ->
454            to_binding(Var);
455        _ -> P
456    end,
457    path_to_list(R, [P1|Acc], DotDotCount).
458
459maybe_encode_bindings([]) ->
460    [];
461maybe_encode_bindings(Props) ->
462    lists:foldl(fun
463            ({{bind, <<"*">>}, _V}, Acc) ->
464                Acc;
465            ({{bind, K}, V}, Acc) ->
466                V1 = iolist_to_binary(maybe_json(K, V)),
467                [{K, V1}|Acc]
468        end, [], Props).
469
470decode_query_value({K,V}) ->
471    case lists:member(K, ["key", "startkey", "start_key",
472                "endkey", "end_key", "keys"]) of
473        true ->
474            {to_binding(K), ?JSON_DECODE(V)};
475        false ->
476            {to_binding(K), ?l2b(V)}
477    end.
478
479to_binding({bind, V}) ->
480    {bind, V};
481to_binding(V) when is_list(V) ->
482    to_binding(?l2b(V));
483to_binding(V) ->
484    {bind, V}.
485