1% Licensed under the Apache License, Version 2.0 (the "License"); you may not 2% use this file except in compliance with the License. You may obtain a copy of 3% the License at 4% 5% http://www.apache.org/licenses/LICENSE-2.0 6% 7% Unless required by applicable law or agreed to in writing, software 8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 10% License for the specific language governing permissions and limitations under 11% the License. 12 13-module(mango_idx_text). 14 15 16-export([ 17 validate_new/2, 18 validate_fields/1, 19 validate_index_def/1, 20 add/2, 21 remove/2, 22 from_ddoc/1, 23 to_json/1, 24 columns/1, 25 is_usable/3, 26 get_default_field_options/1 27]). 28 29 30-include_lib("couch/include/couch_db.hrl"). 31-include("mango.hrl"). 32-include("mango_idx.hrl"). 33 34 35validate_new(#idx{}=Idx, Db) -> 36 {ok, Def} = do_validate(Idx#idx.def), 37 maybe_reject_index_all_req(Def, Db), 38 {ok, Idx#idx{def=Def}}. 39 40 41validate_index_def(IndexInfo) -> 42 do_validate(IndexInfo). 43 44 45add(#doc{body={Props0}}=DDoc, Idx) -> 46 Texts1 = case proplists:get_value(<<"indexes">>, Props0) of 47 {Texts0} -> Texts0; 48 _ -> [] 49 end, 50 NewText = make_text(Idx), 51 Texts2 = lists:keystore(element(1, NewText), 1, Texts1, NewText), 52 Props1 = lists:keystore(<<"indexes">>, 1, Props0, {<<"indexes">>, 53 {Texts2}}), 54 {ok, DDoc#doc{body={Props1}}}. 55 56 57remove(#doc{body={Props0}}=DDoc, Idx) -> 58 Texts1 = case proplists:get_value(<<"indexes">>, Props0) of 59 {Texts0} -> 60 Texts0; 61 _ -> 62 ?MANGO_ERROR({index_not_found, Idx#idx.name}) 63 end, 64 Texts2 = lists:keydelete(Idx#idx.name, 1, Texts1), 65 if Texts2 /= Texts1 -> ok; true -> 66 ?MANGO_ERROR({index_not_found, Idx#idx.name}) 67 end, 68 Props1 = case Texts2 of 69 [] -> 70 lists:keydelete(<<"indexes">>, 1, Props0); 71 _ -> 72 lists:keystore(<<"indexes">>, 1, Props0, {<<"indexes">>, {Texts2}}) 73 end, 74 {ok, DDoc#doc{body={Props1}}}. 75 76 77from_ddoc({Props}) -> 78 case lists:keyfind(<<"indexes">>, 1, Props) of 79 {<<"indexes">>, {Texts}} when is_list(Texts) -> 80 lists:flatmap(fun({Name, {VProps}}) -> 81 case validate_ddoc(VProps) of 82 invalid_ddoc -> 83 []; 84 Def -> 85 I = #idx{ 86 type = <<"text">>, 87 name = Name, 88 def = Def 89 }, 90 [I] 91 end 92 end, Texts); 93 _ -> 94 [] 95 end. 96 97 98to_json(Idx) -> 99 {[ 100 {ddoc, Idx#idx.ddoc}, 101 {name, Idx#idx.name}, 102 {type, Idx#idx.type}, 103 {partitioned, Idx#idx.partitioned}, 104 {def, {def_to_json(Idx#idx.def)}} 105 ]}. 106 107 108columns(Idx) -> 109 {Props} = Idx#idx.def, 110 {<<"fields">>, Fields} = lists:keyfind(<<"fields">>, 1, Props), 111 case Fields of 112 <<"all_fields">> -> 113 all_fields; 114 _ -> 115 {DFProps} = couch_util:get_value(<<"default_field">>, Props, {[]}), 116 Enabled = couch_util:get_value(<<"enabled">>, DFProps, true), 117 Default = case Enabled of 118 true -> [<<"$default">>]; 119 false -> [] 120 end, 121 Default ++ lists:map(fun({FProps}) -> 122 {_, Name} = lists:keyfind(<<"name">>, 1, FProps), 123 {_, Type} = lists:keyfind(<<"type">>, 1, FProps), 124 iolist_to_binary([Name, ":", Type]) 125 end, Fields) 126 end. 127 128 129is_usable(_, Selector, _) when Selector =:= {[]} -> 130 false; 131is_usable(Idx, Selector, _) -> 132 case columns(Idx) of 133 all_fields -> 134 true; 135 Cols -> 136 Fields = indexable_fields(Selector), 137 sets:is_subset(sets:from_list(Fields), sets:from_list(Cols)) 138 end. 139 140 141do_validate({Props}) -> 142 {ok, Opts} = mango_opts:validate(Props, opts()), 143 {ok, {Opts}}; 144do_validate(Else) -> 145 ?MANGO_ERROR({invalid_index_text, Else}). 146 147 148def_to_json({Props}) -> 149 def_to_json(Props); 150def_to_json([]) -> 151 []; 152def_to_json([{<<"fields">>, <<"all_fields">>} | Rest]) -> 153 [{<<"fields">>, []} | def_to_json(Rest)]; 154def_to_json([{fields, Fields} | Rest]) -> 155 [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)]; 156def_to_json([{<<"fields">>, Fields} | Rest]) -> 157 [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)]; 158% Don't include partial_filter_selector in the json conversion 159% if its the default value 160def_to_json([{<<"partial_filter_selector">>, {[]}} | Rest]) -> 161 def_to_json(Rest); 162def_to_json([{Key, Value} | Rest]) -> 163 [{Key, Value} | def_to_json(Rest)]. 164 165 166fields_to_json([]) -> 167 []; 168fields_to_json([{[{<<"name">>, Name}, {<<"type">>, Type0}]} | Rest]) -> 169 ok = validate_field_name(Name), 170 Type = validate_field_type(Type0), 171 [{[{Name, Type}]} | fields_to_json(Rest)]; 172fields_to_json([{[{<<"type">>, Type0}, {<<"name">>, Name}]} | Rest]) -> 173 ok = validate_field_name(Name), 174 Type = validate_field_type(Type0), 175 [{[{Name, Type}]} | fields_to_json(Rest)]. 176 177 178%% In the future, we can possibly add more restrictive validation. 179%% For now, let's make sure the field name is not blank. 180validate_field_name(<<"">>) -> 181 throw(invalid_field_name); 182validate_field_name(Else) when is_binary(Else)-> 183 ok; 184validate_field_name(_) -> 185 throw(invalid_field_name). 186 187 188validate_field_type(<<"string">>) -> 189 <<"string">>; 190validate_field_type(<<"number">>) -> 191 <<"number">>; 192validate_field_type(<<"boolean">>) -> 193 <<"boolean">>. 194 195 196validate_fields(<<"all_fields">>) -> 197 {ok, all_fields}; 198validate_fields(Fields) -> 199 try fields_to_json(Fields) of 200 _ -> 201 mango_fields:new(Fields) 202 catch error:function_clause -> 203 ?MANGO_ERROR({invalid_index_fields_definition, Fields}); 204 throw:invalid_field_name -> 205 ?MANGO_ERROR({invalid_index_fields_definition, Fields}) 206 end. 207 208 209validate_ddoc(VProps) -> 210 try 211 Def = proplists:get_value(<<"index">>, VProps), 212 validate_index_def(Def), 213 Def 214 catch Error:Reason -> 215 couch_log:error("Invalid Index Def ~p: Error. ~p, Reason: ~p", 216 [VProps, Error, Reason]), 217 invalid_ddoc 218 end. 219 220 221opts() -> 222 [ 223 {<<"default_analyzer">>, [ 224 {tag, default_analyzer}, 225 {optional, true}, 226 {default, <<"keyword">>} 227 ]}, 228 {<<"default_field">>, [ 229 {tag, default_field}, 230 {optional, true}, 231 {default, {[]}} 232 ]}, 233 {<<"partial_filter_selector">>, [ 234 {tag, partial_filter_selector}, 235 {optional, true}, 236 {default, {[]}}, 237 {validator, fun mango_opts:validate_selector/1} 238 ]}, 239 {<<"selector">>, [ 240 {tag, selector}, 241 {optional, true}, 242 {default, {[]}}, 243 {validator, fun mango_opts:validate_selector/1} 244 ]}, 245 {<<"fields">>, [ 246 {tag, fields}, 247 {optional, true}, 248 {default, []}, 249 {validator, fun ?MODULE:validate_fields/1} 250 ]}, 251 {<<"index_array_lengths">>, [ 252 {tag, index_array_lengths}, 253 {optional, true}, 254 {default, true}, 255 {validator, fun mango_opts:is_boolean/1} 256 ]} 257 ]. 258 259 260make_text(Idx) -> 261 Text= {[ 262 {<<"index">>, Idx#idx.def}, 263 {<<"analyzer">>, construct_analyzer(Idx#idx.def)} 264 ]}, 265 {Idx#idx.name, Text}. 266 267 268get_default_field_options(Props) -> 269 Default = couch_util:get_value(default_field, Props, {[]}), 270 case Default of 271 Bool when is_boolean(Bool) -> 272 {Bool, <<"standard">>}; 273 {[]} -> 274 {true, <<"standard">>}; 275 {Opts}-> 276 Enabled = couch_util:get_value(<<"enabled">>, Opts, true), 277 Analyzer = couch_util:get_value(<<"analyzer">>, Opts, 278 <<"standard">>), 279 {Enabled, Analyzer} 280 end. 281 282 283construct_analyzer({Props}) -> 284 DefaultAnalyzer = couch_util:get_value(default_analyzer, Props, 285 <<"keyword">>), 286 {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props), 287 DefaultAnalyzerDef = case DefaultField of 288 true -> 289 [{<<"$default">>, DefaultFieldAnalyzer}]; 290 _ -> 291 [] 292 end, 293 case DefaultAnalyzerDef of 294 [] -> 295 <<"keyword">>; 296 _ -> 297 {[ 298 {<<"name">>, <<"perfield">>}, 299 {<<"default">>, DefaultAnalyzer}, 300 {<<"fields">>, {DefaultAnalyzerDef}} 301 ]} 302 end. 303 304 305indexable_fields(Selector) -> 306 TupleTree = mango_selector_text:convert([], Selector), 307 indexable_fields([], TupleTree). 308 309 310indexable_fields(Fields, {op_and, Args}) when is_list(Args) -> 311 lists:foldl(fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end, 312 Fields, Args); 313 314%% For queries that use array element access or $in operations, two 315%% fields get generated by mango_selector_text:convert. At index 316%% definition time, only one field gets defined. In this situation, we 317%% remove the extra generated field so that the index can be used. For 318%% all other situations, we include the fields as normal. 319indexable_fields(Fields, {op_or, [{op_field, Field0}, 320 {op_field, {[Name | _], _}} = Field1]}) -> 321 case lists:member(<<"[]">>, Name) of 322 true -> 323 indexable_fields(Fields, {op_field, Field0}); 324 false -> 325 Fields1 = indexable_fields(Fields, {op_field, Field0}), 326 indexable_fields(Fields1, Field1) 327 end; 328indexable_fields(Fields, {op_or, Args}) when is_list(Args) -> 329 lists:foldl(fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end, 330 Fields, Args); 331 332indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) -> 333 Fields0 = indexable_fields(Fields, ExistsQuery), 334 indexable_fields(Fields0, Arg); 335% forces "$exists" : false to use _all_docs 336indexable_fields(_, {op_not, {_, false}}) -> 337 []; 338 339indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) -> 340 Fields; 341 342%% fieldname.[]:length is not a user defined field. 343indexable_fields(Fields, {op_field, {[_, <<":length">>], _}}) -> 344 Fields; 345indexable_fields(Fields, {op_field, {Name, _}}) -> 346 [iolist_to_binary(Name) | Fields]; 347 348%% In this particular case, the lucene index is doing a field_exists query 349%% so it is looking at all sorts of combinations of field:* and field.* 350%% We don't add the field because we cannot pre-determine what field will exist. 351%% Hence we just return Fields and make it less restrictive. 352indexable_fields(Fields, {op_fieldname, {_, _}}) -> 353 Fields; 354 355%% Similar idea to op_fieldname but with fieldname:null 356indexable_fields(Fields, {op_null, {_, _}}) -> 357 Fields; 358 359indexable_fields(Fields, {op_default, _}) -> 360 [<<"$default">> | Fields]. 361 362 363maybe_reject_index_all_req({Def}, Db) -> 364 DbName = couch_db:name(Db), 365 #user_ctx{name = User} = couch_db:get_user_ctx(Db), 366 Fields = couch_util:get_value(fields, Def), 367 case {Fields, forbid_index_all()} of 368 {all_fields, "true"} -> 369 ?MANGO_ERROR(index_all_disabled); 370 {all_fields, "warn"} -> 371 couch_log:warning("User ~p is indexing all fields in db ~p", 372 [User, DbName]); 373 _ -> 374 ok 375 end. 376 377 378forbid_index_all() -> 379 config:get("mango", "index_all_disabled", "false"). 380 381 382-ifdef(TEST). 383-include_lib("eunit/include/eunit.hrl"). 384 385 386setup_all() -> 387 Ctx = test_util:start_couch(), 388 meck:expect(couch_log, warning, 2, 389 fun(_,_) -> 390 throw({test_error, logged_warning}) 391 end), 392 Ctx. 393 394 395teardown_all(Ctx) -> 396 meck:unload(), 397 test_util:stop_couch(Ctx). 398 399 400setup() -> 401 %default index all def that generates {fields, all_fields} 402 Index = #idx{def={[]}}, 403 DbName = <<"testdb">>, 404 UserCtx = #user_ctx{name = <<"u1">>}, 405 {ok, Db} = couch_db:clustered_db(DbName, UserCtx), 406 {Index, Db}. 407 408 409teardown(_) -> 410 ok. 411 412 413index_all_test_() -> 414 { 415 setup, 416 fun setup_all/0, 417 fun teardown_all/1, 418 { 419 foreach, 420 fun setup/0, 421 fun teardown/1, 422 [ 423 fun forbid_index_all/1, 424 fun default_and_false_index_all/1, 425 fun warn_index_all/1 426 ] 427 } 428 }. 429 430 431forbid_index_all({Idx, Db}) -> 432 ?_test(begin 433 ok = config:set("mango", "index_all_disabled", "true", false), 434 ?assertThrow({mango_error, ?MODULE, index_all_disabled}, 435 validate_new(Idx, Db) 436 ) 437 end). 438 439 440default_and_false_index_all({Idx, Db}) -> 441 ?_test(begin 442 config:delete("mango", "index_all_disabled", false), 443 {ok, #idx{def={Def}}} = validate_new(Idx, Db), 444 Fields = couch_util:get_value(fields, Def), 445 ?assertEqual(all_fields, Fields), 446 ok = config:set("mango", "index_all_disabled", "false", false), 447 {ok, #idx{def={Def2}}} = validate_new(Idx, Db), 448 Fields2 = couch_util:get_value(fields, Def2), 449 ?assertEqual(all_fields, Fields2) 450 end). 451 452 453warn_index_all({Idx, Db}) -> 454 ?_test(begin 455 ok = config:set("mango", "index_all_disabled", "warn", false), 456 ?assertThrow({test_error, logged_warning}, validate_new(Idx, Db)) 457 end). 458 459 460-endif. 461