1% Licensed under the Apache License, Version 2.0 (the "License"); you may not 2% use this file except in compliance with the License. You may obtain a copy of 3% the License at 4% 5% http://www.apache.org/licenses/LICENSE-2.0 6% 7% Unless required by applicable law or agreed to in writing, software 8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10% License for the specific language governing permissions and limitations under 11% the License. 12% 13% bind_path is based on bind method from Webmachine 14 15 16%% @doc Module for URL rewriting by pattern matching. 17 18-module(couch_httpd_rewrite). 19 20-compile(tuple_calls). 21 22-export([handle_rewrite_req/3]). 23-include_lib("couch/include/couch_db.hrl"). 24 25-define(SEPARATOR, $\/). 26-define(MATCH_ALL, {bind, <<"*">>}). 27 28 29%% doc The http rewrite handler. All rewriting is done from 30%% /dbname/_design/ddocname/_rewrite by default. 31%% 32%% each rules should be in rewrites member of the design doc. 33%% Ex of a complete rule : 34%% 35%% { 36%% .... 37%% "rewrites": [ 38%% { 39%% "from": "", 40%% "to": "index.html", 41%% "method": "GET", 42%% "query": {} 43%% } 44%% ] 45%% } 46%% 47%% from: is the path rule used to bind current uri to the rule. It 48%% use pattern matching for that. 49%% 50%% to: rule to rewrite an url. It can contain variables depending on binding 51%% variables discovered during pattern matching and query args (url args and from 52%% the query member.) 53%% 54%% method: method to bind the request method to the rule. by default "*" 55%% query: query args you want to define they can contain dynamic variable 56%% by binding the key to the bindings 57%% 58%% 59%% to and from are path with patterns. pattern can be string starting with ":" or 60%% "*". ex: 61%% /somepath/:var/* 62%% 63%% This path is converted in erlang list by splitting "/". Each var are 64%% converted in atom. "*" is converted to '*' atom. The pattern matching is done 65%% by splitting "/" in request url in a list of token. A string pattern will 66%% match equal token. The star atom ('*' in single quotes) will match any number 67%% of tokens, but may only be present as the last pathtern in a pathspec. If all 68%% tokens are matched and all pathterms are used, then the pathspec matches. It works 69%% like webmachine. Each identified token will be reused in to rule and in query 70%% 71%% The pattern matching is done by first matching the request method to a rule. by 72%% default all methods match a rule. (method is equal to "*" by default). Then 73%% It will try to match the path to one rule. If no rule match, then a 404 error 74%% is displayed. 75%% 76%% Once a rule is found we rewrite the request url using the "to" and 77%% "query" members. The identified token are matched to the rule and 78%% will replace var. if '*' is found in the rule it will contain the remaining 79%% part if it exists. 80%% 81%% Examples: 82%% 83%% Dispatch rule URL TO Tokens 84%% 85%% {"from": "/a/b", /a/b?k=v /some/b?k=v var =:= b 86%% "to": "/some/"} k = v 87%% 88%% {"from": "/a/b", /a/b /some/b?var=b var =:= b 89%% "to": "/some/:var"} 90%% 91%% {"from": "/a", /a /some 92%% "to": "/some/*"} 93%% 94%% {"from": "/a/*", /a/b/c /some/b/c 95%% "to": "/some/*"} 96%% 97%% {"from": "/a", /a /some 98%% "to": "/some/*"} 99%% 100%% {"from": "/a/:foo/*", /a/b/c /some/b/c?foo=b foo =:= b 101%% "to": "/some/:foo/*"} 102%% 103%% {"from": "/a/:foo", /a/b /some/?k=b&foo=b foo =:= b 104%% "to": "/some", 105%% "query": { 106%% "k": ":foo" 107%% }} 108%% 109%% {"from": "/a", /a?foo=b /some/b foo =:= b 110%% "to": "/some/:foo", 111%% }} 112 113 114 115handle_rewrite_req(#httpd{ 116 path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts], 117 method=Method, 118 mochi_req=MochiReq}=Req, _Db, DDoc) -> 119 120 % we are in a design handler 121 DesignId = <<"_design/", DesignName/binary>>, 122 Prefix = <<"/", (?l2b(couch_util:url_encode(DbName)))/binary, "/", DesignId/binary>>, 123 QueryList = lists:map(fun decode_query_value/1, couch_httpd:qs(Req)), 124 125 RewritesSoFar = erlang:get(?REWRITE_COUNT), 126 MaxRewrites = chttpd_util:get_chttpd_config_integer("rewrite_limit", 100), 127 case RewritesSoFar >= MaxRewrites of 128 true -> 129 throw({bad_request, <<"Exceeded rewrite recursion limit">>}); 130 false -> 131 erlang:put(?REWRITE_COUNT, RewritesSoFar + 1) 132 end, 133 134 #doc{body={Props}} = DDoc, 135 136 % get rules from ddoc 137 case couch_util:get_value(<<"rewrites">>, Props) of 138 undefined -> 139 couch_httpd:send_error(Req, 404, <<"rewrite_error">>, 140 <<"Invalid path.">>); 141 Bin when is_binary(Bin) -> 142 couch_httpd:send_error(Req, 400, <<"rewrite_error">>, 143 <<"Rewrite rules are a String. They must be a JSON Array.">>); 144 Rules -> 145 % create dispatch list from rules 146 DispatchList = [make_rule(Rule) || {Rule} <- Rules], 147 Method1 = couch_util:to_binary(Method), 148 149 % get raw path by matching url to a rule. Throws not_found. 150 {NewPathParts0, Bindings0} = 151 try_bind_path(DispatchList, Method1, PathParts, QueryList), 152 NewPathParts = [quote_plus(X) || X <- NewPathParts0], 153 Bindings = maybe_encode_bindings(Bindings0), 154 155 Path0 = string:join(NewPathParts, [?SEPARATOR]), 156 157 % if path is relative detect it and rewrite path 158 Path1 = case mochiweb_util:safe_relative_path(Path0) of 159 undefined -> 160 ?b2l(Prefix) ++ "/" ++ Path0; 161 P1 -> 162 ?b2l(Prefix) ++ "/" ++ P1 163 end, 164 165 Path2 = normalize_path(Path1), 166 167 Path3 = case Bindings of 168 [] -> 169 Path2; 170 _ -> 171 [Path2, "?", mochiweb_util:urlencode(Bindings)] 172 end, 173 174 RawPath1 = ?b2l(iolist_to_binary(Path3)), 175 176 % In order to do OAuth correctly, we have to save the 177 % requested path. We use default so chained rewriting 178 % wont replace the original header. 179 Headers = mochiweb_headers:default("x-couchdb-requested-path", 180 MochiReq:get(raw_path), 181 MochiReq:get(headers)), 182 183 couch_log:debug("rewrite to ~p ~n", [RawPath1]), 184 185 % build a new mochiweb request 186 MochiReq1 = mochiweb_request:new(MochiReq:get(socket), 187 MochiReq:get(method), 188 RawPath1, 189 MochiReq:get(version), 190 Headers), 191 192 % cleanup, It force mochiweb to reparse raw uri. 193 MochiReq1:cleanup(), 194 195 #httpd{ 196 db_url_handlers = DbUrlHandlers, 197 design_url_handlers = DesignUrlHandlers, 198 default_fun = DefaultFun, 199 url_handlers = UrlHandlers, 200 user_ctx = UserCtx, 201 auth = Auth 202 } = Req, 203 204 erlang:put(pre_rewrite_auth, Auth), 205 erlang:put(pre_rewrite_user_ctx, UserCtx), 206 couch_httpd:handle_request_int(MochiReq1, DefaultFun, 207 UrlHandlers, DbUrlHandlers, DesignUrlHandlers) 208 end. 209 210quote_plus({bind, X}) -> 211 mochiweb_util:quote_plus(X); 212quote_plus(X) -> 213 mochiweb_util:quote_plus(X). 214 215%% @doc Try to find a rule matching current url. If none is found 216%% 404 error not_found is raised 217try_bind_path([], _Method, _PathParts, _QueryList) -> 218 throw(not_found); 219try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) -> 220 [{PathParts1, Method1}, RedirectPath, QueryArgs, Formats] = Dispatch, 221 case bind_method(Method1, Method) of 222 true -> 223 case bind_path(PathParts1, PathParts, []) of 224 {ok, Remaining, Bindings} -> 225 Bindings1 = Bindings ++ QueryList, 226 % we parse query args from the rule and fill 227 % it eventually with bindings vars 228 QueryArgs1 = make_query_list(QueryArgs, Bindings1, 229 Formats, []), 230 % remove params in QueryLists1 that are already in 231 % QueryArgs1 232 Bindings2 = lists:foldl(fun({K, V}, Acc) -> 233 K1 = to_binding(K), 234 KV = case couch_util:get_value(K1, QueryArgs1) of 235 undefined -> [{K1, V}]; 236 _V1 -> [] 237 end, 238 Acc ++ KV 239 end, [], Bindings1), 240 241 FinalBindings = Bindings2 ++ QueryArgs1, 242 NewPathParts = make_new_path(RedirectPath, FinalBindings, 243 Remaining, []), 244 {NewPathParts, FinalBindings}; 245 fail -> 246 try_bind_path(Rest, Method, PathParts, QueryList) 247 end; 248 false -> 249 try_bind_path(Rest, Method, PathParts, QueryList) 250 end. 251 252%% rewriting dynamically the quey list given as query member in 253%% rewrites. Each value is replaced by one binding or an argument 254%% passed in url. 255make_query_list([], _Bindings, _Formats, Acc) -> 256 Acc; 257make_query_list([{Key, {Value}}|Rest], Bindings, Formats, Acc) -> 258 Value1 = {Value}, 259 make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); 260make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_binary(Value) -> 261 Value1 = replace_var(Value, Bindings, Formats), 262 make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); 263make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_list(Value) -> 264 Value1 = replace_var(Value, Bindings, Formats), 265 make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); 266make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) -> 267 make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value}|Acc]). 268 269replace_var(<<"*">>=Value, Bindings, Formats) -> 270 get_var(Value, Bindings, Value, Formats); 271replace_var(<<":", Var/binary>> = Value, Bindings, Formats) -> 272 get_var(Var, Bindings, Value, Formats); 273replace_var(Value, _Bindings, _Formats) when is_binary(Value) -> 274 Value; 275replace_var(Value, Bindings, Formats) when is_list(Value) -> 276 lists:reverse(lists:foldl(fun 277 (<<":", Var/binary>>=Value1, Acc) -> 278 [get_var(Var, Bindings, Value1, Formats)|Acc]; 279 (Value1, Acc) -> 280 [Value1|Acc] 281 end, [], Value)); 282replace_var(Value, _Bindings, _Formats) -> 283 Value. 284 285maybe_json(Key, Value) -> 286 case lists:member(Key, [<<"key">>, <<"startkey">>, <<"start_key">>, 287 <<"endkey">>, <<"end_key">>, <<"keys">>]) of 288 true -> 289 ?JSON_ENCODE(Value); 290 false -> 291 Value 292 end. 293 294get_var(VarName, Props, Default, Formats) -> 295 VarName1 = to_binding(VarName), 296 Val = couch_util:get_value(VarName1, Props, Default), 297 maybe_format(VarName, Val, Formats). 298 299maybe_format(VarName, Value, Formats) -> 300 case couch_util:get_value(VarName, Formats) of 301 undefined -> 302 Value; 303 Format -> 304 format(Format, Value) 305 end. 306 307format(<<"int">>, Value) when is_integer(Value) -> 308 Value; 309format(<<"int">>, Value) when is_binary(Value) -> 310 format(<<"int">>, ?b2l(Value)); 311format(<<"int">>, Value) when is_list(Value) -> 312 case (catch list_to_integer(Value)) of 313 IntVal when is_integer(IntVal) -> 314 IntVal; 315 _ -> 316 Value 317 end; 318format(<<"bool">>, Value) when is_binary(Value) -> 319 format(<<"bool">>, ?b2l(Value)); 320format(<<"bool">>, Value) when is_list(Value) -> 321 case string:to_lower(Value) of 322 "true" -> true; 323 "false" -> false; 324 _ -> Value 325 end; 326format(_Format, Value) -> 327 Value. 328 329%% doc: build new patch from bindings. bindings are query args 330%% (+ dynamic query rewritten if needed) and bindings found in 331%% bind_path step. 332make_new_path([], _Bindings, _Remaining, Acc) -> 333 lists:reverse(Acc); 334make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) -> 335 Acc1 = lists:reverse(Acc) ++ Remaining, 336 Acc1; 337make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> 338 Acc1 = lists:reverse(Acc) ++ Remaining, 339 Acc1; 340make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) -> 341 P2 = case couch_util:get_value({bind, P}, Bindings) of 342 undefined -> << "undefined">>; 343 P1 -> 344 iolist_to_binary(P1) 345 end, 346 make_new_path(Rest, Bindings, Remaining, [P2|Acc]); 347make_new_path([P|Rest], Bindings, Remaining, Acc) -> 348 make_new_path(Rest, Bindings, Remaining, [P|Acc]). 349 350 351%% @doc If method of the query fith the rule method. If the 352%% method rule is '*', which is the default, all 353%% request method will bind. It allows us to make rules 354%% depending on HTTP method. 355bind_method(?MATCH_ALL, _Method ) -> 356 true; 357bind_method({bind, Method}, Method) -> 358 true; 359bind_method(_, _) -> 360 false. 361 362 363%% @doc bind path. Using the rule from we try to bind variables given 364%% to the current url by pattern matching 365bind_path([], [], Bindings) -> 366 {ok, [], Bindings}; 367bind_path([?MATCH_ALL], [Match|_RestMatch]=Rest, Bindings) -> 368 {ok, Rest, [{?MATCH_ALL, Match}|Bindings]}; 369bind_path(_, [], _) -> 370 fail; 371bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) -> 372 bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]); 373bind_path([Token|RestToken], [Token|RestMatch], Bindings) -> 374 bind_path(RestToken, RestMatch, Bindings); 375bind_path(_, _, _) -> 376 fail. 377 378 379%% normalize path. 380normalize_path(Path) -> 381 "/" ++ string:join(normalize_path1(string:tokens(Path, 382 "/"), []), [?SEPARATOR]). 383 384 385normalize_path1([], Acc) -> 386 lists:reverse(Acc); 387normalize_path1([".."|Rest], Acc) -> 388 Acc1 = case Acc of 389 [] -> [".."|Acc]; 390 [T|_] when T =:= ".." -> [".."|Acc]; 391 [_|R] -> R 392 end, 393 normalize_path1(Rest, Acc1); 394normalize_path1(["."|Rest], Acc) -> 395 normalize_path1(Rest, Acc); 396normalize_path1([Path|Rest], Acc) -> 397 normalize_path1(Rest, [Path|Acc]). 398 399 400%% @doc transform json rule in erlang for pattern matching 401make_rule(Rule) -> 402 Method = case couch_util:get_value(<<"method">>, Rule) of 403 undefined -> ?MATCH_ALL; 404 M -> to_binding(M) 405 end, 406 QueryArgs = case couch_util:get_value(<<"query">>, Rule) of 407 undefined -> []; 408 {Args} -> Args 409 end, 410 FromParts = case couch_util:get_value(<<"from">>, Rule) of 411 undefined -> [?MATCH_ALL]; 412 From -> 413 parse_path(From) 414 end, 415 ToParts = case couch_util:get_value(<<"to">>, Rule) of 416 undefined -> 417 throw({error, invalid_rewrite_target}); 418 To -> 419 parse_path(To) 420 end, 421 Formats = case couch_util:get_value(<<"formats">>, Rule) of 422 undefined -> []; 423 {Fmts} -> Fmts 424 end, 425 [{FromParts, Method}, ToParts, QueryArgs, Formats]. 426 427parse_path(Path) -> 428 {ok, SlashRE} = re:compile(<<"\\/">>), 429 path_to_list(re:split(Path, SlashRE), [], 0). 430 431%% @doc convert a path rule (from or to) to an erlang list 432%% * and path variable starting by ":" are converted 433%% in erlang atom. 434path_to_list([], Acc, _DotDotCount) -> 435 lists:reverse(Acc); 436path_to_list([<<>>|R], Acc, DotDotCount) -> 437 path_to_list(R, Acc, DotDotCount); 438path_to_list([<<"*">>|R], Acc, DotDotCount) -> 439 path_to_list(R, [?MATCH_ALL|Acc], DotDotCount); 440path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 -> 441 case chttpd_util:get_chttpd_config_boolean("secure_rewrites", true) of 442 false -> 443 path_to_list(R, [<<"..">>|Acc], DotDotCount+1); 444 true -> 445 couch_log:info("insecure_rewrite_rule ~p blocked", 446 [lists:reverse(Acc) ++ [<<"..">>] ++ R]), 447 throw({insecure_rewrite_rule, "too many ../.. segments"}) 448 end; 449path_to_list([<<"..">>|R], Acc, DotDotCount) -> 450 path_to_list(R, [<<"..">>|Acc], DotDotCount+1); 451path_to_list([P|R], Acc, DotDotCount) -> 452 P1 = case P of 453 <<":", Var/binary>> -> 454 to_binding(Var); 455 _ -> P 456 end, 457 path_to_list(R, [P1|Acc], DotDotCount). 458 459maybe_encode_bindings([]) -> 460 []; 461maybe_encode_bindings(Props) -> 462 lists:foldl(fun 463 ({{bind, <<"*">>}, _V}, Acc) -> 464 Acc; 465 ({{bind, K}, V}, Acc) -> 466 V1 = iolist_to_binary(maybe_json(K, V)), 467 [{K, V1}|Acc] 468 end, [], Props). 469 470decode_query_value({K,V}) -> 471 case lists:member(K, ["key", "startkey", "start_key", 472 "endkey", "end_key", "keys"]) of 473 true -> 474 {to_binding(K), ?JSON_DECODE(V)}; 475 false -> 476 {to_binding(K), ?l2b(V)} 477 end. 478 479to_binding({bind, V}) -> 480 {bind, V}; 481to_binding(V) when is_list(V) -> 482 to_binding(?l2b(V)); 483to_binding(V) -> 484 {bind, V}. 485