1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5% http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12
13-module(mango_idx_text).
14
15
16-export([
17    validate_new/2,
18    validate_fields/1,
19    validate_index_def/1,
20    add/2,
21    remove/2,
22    from_ddoc/1,
23    to_json/1,
24    columns/1,
25    is_usable/3,
26    get_default_field_options/1
27]).
28
29
30-include_lib("couch/include/couch_db.hrl").
31-include("mango.hrl").
32-include("mango_idx.hrl").
33
34
35validate_new(#idx{}=Idx, Db) ->
36    {ok, Def} = do_validate(Idx#idx.def),
37    maybe_reject_index_all_req(Def, Db),
38    {ok, Idx#idx{def=Def}}.
39
40
41validate_index_def(IndexInfo) ->
42    do_validate(IndexInfo).
43
44
45add(#doc{body={Props0}}=DDoc, Idx) ->
46    Texts1 = case proplists:get_value(<<"indexes">>, Props0) of
47        {Texts0} -> Texts0;
48        _ -> []
49    end,
50    NewText = make_text(Idx),
51    Texts2 = lists:keystore(element(1, NewText), 1, Texts1, NewText),
52    Props1 = lists:keystore(<<"indexes">>, 1, Props0, {<<"indexes">>,
53        {Texts2}}),
54    {ok, DDoc#doc{body={Props1}}}.
55
56
57remove(#doc{body={Props0}}=DDoc, Idx) ->
58    Texts1 = case proplists:get_value(<<"indexes">>, Props0) of
59        {Texts0} ->
60            Texts0;
61        _ ->
62            ?MANGO_ERROR({index_not_found, Idx#idx.name})
63    end,
64    Texts2 = lists:keydelete(Idx#idx.name, 1, Texts1),
65    if Texts2 /= Texts1 -> ok; true ->
66        ?MANGO_ERROR({index_not_found, Idx#idx.name})
67    end,
68    Props1 = case Texts2 of
69        [] ->
70            lists:keydelete(<<"indexes">>, 1, Props0);
71        _ ->
72            lists:keystore(<<"indexes">>, 1, Props0, {<<"indexes">>, {Texts2}})
73    end,
74    {ok, DDoc#doc{body={Props1}}}.
75
76
77from_ddoc({Props}) ->
78    case lists:keyfind(<<"indexes">>, 1, Props) of
79        {<<"indexes">>, {Texts}} when is_list(Texts) ->
80            lists:flatmap(fun({Name, {VProps}}) ->
81                case validate_ddoc(VProps) of
82                    invalid_ddoc ->
83                        [];
84                    Def ->
85                        I = #idx{
86                        type = <<"text">>,
87                        name = Name,
88                        def = Def
89                        },
90                        [I]
91                end
92            end, Texts);
93        _ ->
94            []
95    end.
96
97
98to_json(Idx) ->
99    {[
100        {ddoc, Idx#idx.ddoc},
101        {name, Idx#idx.name},
102        {type, Idx#idx.type},
103        {partitioned, Idx#idx.partitioned},
104        {def, {def_to_json(Idx#idx.def)}}
105    ]}.
106
107
108columns(Idx) ->
109    {Props} = Idx#idx.def,
110    {<<"fields">>, Fields} = lists:keyfind(<<"fields">>, 1, Props),
111    case Fields of
112        <<"all_fields">> ->
113            all_fields;
114        _ ->
115            {DFProps} = couch_util:get_value(<<"default_field">>, Props, {[]}),
116            Enabled = couch_util:get_value(<<"enabled">>, DFProps, true),
117            Default = case Enabled of
118                true -> [<<"$default">>];
119                false -> []
120            end,
121            Default ++ lists:map(fun({FProps}) ->
122                {_, Name} = lists:keyfind(<<"name">>, 1, FProps),
123                {_, Type} = lists:keyfind(<<"type">>, 1, FProps),
124                iolist_to_binary([Name, ":", Type])
125            end, Fields)
126    end.
127
128
129is_usable(_, Selector, _) when Selector =:= {[]} ->
130    false;
131is_usable(Idx, Selector, _) ->
132    case columns(Idx) of
133        all_fields ->
134            true;
135        Cols ->
136            Fields = indexable_fields(Selector),
137            sets:is_subset(sets:from_list(Fields), sets:from_list(Cols))
138    end.
139
140
141do_validate({Props}) ->
142    {ok, Opts} = mango_opts:validate(Props, opts()),
143    {ok, {Opts}};
144do_validate(Else) ->
145    ?MANGO_ERROR({invalid_index_text, Else}).
146
147
148def_to_json({Props}) ->
149    def_to_json(Props);
150def_to_json([]) ->
151    [];
152def_to_json([{<<"fields">>, <<"all_fields">>} | Rest]) ->
153    [{<<"fields">>, []} | def_to_json(Rest)];
154def_to_json([{fields, Fields} | Rest]) ->
155    [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)];
156def_to_json([{<<"fields">>, Fields} | Rest]) ->
157    [{<<"fields">>, fields_to_json(Fields)} | def_to_json(Rest)];
158% Don't include partial_filter_selector in the json conversion
159% if its the default value
160def_to_json([{<<"partial_filter_selector">>, {[]}} | Rest]) ->
161    def_to_json(Rest);
162def_to_json([{Key, Value} | Rest]) ->
163    [{Key, Value} | def_to_json(Rest)].
164
165
166fields_to_json([]) ->
167    [];
168fields_to_json([{[{<<"name">>, Name}, {<<"type">>, Type0}]} | Rest]) ->
169    ok = validate_field_name(Name),
170    Type = validate_field_type(Type0),
171    [{[{Name, Type}]} | fields_to_json(Rest)];
172fields_to_json([{[{<<"type">>, Type0}, {<<"name">>, Name}]} | Rest]) ->
173    ok = validate_field_name(Name),
174    Type = validate_field_type(Type0),
175    [{[{Name, Type}]} | fields_to_json(Rest)].
176
177
178%% In the future, we can possibly add more restrictive validation.
179%% For now, let's make sure the field name is not blank.
180validate_field_name(<<"">>) ->
181    throw(invalid_field_name);
182validate_field_name(Else) when is_binary(Else)->
183    ok;
184validate_field_name(_) ->
185    throw(invalid_field_name).
186
187
188validate_field_type(<<"string">>) ->
189    <<"string">>;
190validate_field_type(<<"number">>) ->
191    <<"number">>;
192validate_field_type(<<"boolean">>) ->
193    <<"boolean">>.
194
195
196validate_fields(<<"all_fields">>) ->
197    {ok, all_fields};
198validate_fields(Fields) ->
199    try fields_to_json(Fields) of
200        _ ->
201            mango_fields:new(Fields)
202    catch error:function_clause ->
203        ?MANGO_ERROR({invalid_index_fields_definition, Fields});
204    throw:invalid_field_name ->
205        ?MANGO_ERROR({invalid_index_fields_definition, Fields})
206    end.
207
208
209validate_ddoc(VProps) ->
210    try
211        Def = proplists:get_value(<<"index">>, VProps),
212        validate_index_def(Def),
213        Def
214    catch Error:Reason ->
215        couch_log:error("Invalid Index Def ~p: Error. ~p, Reason: ~p",
216            [VProps, Error, Reason]),
217        invalid_ddoc
218    end.
219
220
221opts() ->
222    [
223        {<<"default_analyzer">>, [
224            {tag, default_analyzer},
225            {optional, true},
226            {default, <<"keyword">>}
227        ]},
228        {<<"default_field">>, [
229            {tag, default_field},
230            {optional, true},
231            {default, {[]}}
232        ]},
233        {<<"partial_filter_selector">>, [
234            {tag, partial_filter_selector},
235            {optional, true},
236            {default, {[]}},
237            {validator, fun mango_opts:validate_selector/1}
238        ]},
239        {<<"selector">>, [
240            {tag, selector},
241            {optional, true},
242            {default, {[]}},
243            {validator, fun mango_opts:validate_selector/1}
244        ]},
245        {<<"fields">>, [
246            {tag, fields},
247            {optional, true},
248            {default, []},
249            {validator, fun ?MODULE:validate_fields/1}
250        ]},
251        {<<"index_array_lengths">>, [
252            {tag, index_array_lengths},
253            {optional, true},
254            {default, true},
255            {validator, fun mango_opts:is_boolean/1}
256        ]}
257    ].
258
259
260make_text(Idx) ->
261    Text= {[
262        {<<"index">>, Idx#idx.def},
263        {<<"analyzer">>, construct_analyzer(Idx#idx.def)}
264    ]},
265    {Idx#idx.name, Text}.
266
267
268get_default_field_options(Props) ->
269    Default = couch_util:get_value(default_field, Props, {[]}),
270    case Default of
271        Bool when is_boolean(Bool) ->
272            {Bool, <<"standard">>};
273        {[]} ->
274            {true, <<"standard">>};
275        {Opts}->
276            Enabled = couch_util:get_value(<<"enabled">>, Opts, true),
277            Analyzer = couch_util:get_value(<<"analyzer">>, Opts,
278                <<"standard">>),
279            {Enabled, Analyzer}
280    end.
281
282
283construct_analyzer({Props}) ->
284    DefaultAnalyzer = couch_util:get_value(default_analyzer, Props,
285        <<"keyword">>),
286    {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props),
287    DefaultAnalyzerDef = case DefaultField of
288        true ->
289            [{<<"$default">>, DefaultFieldAnalyzer}];
290        _ ->
291            []
292    end,
293    case DefaultAnalyzerDef of
294        [] ->
295            <<"keyword">>;
296        _ ->
297            {[
298                {<<"name">>, <<"perfield">>},
299                {<<"default">>, DefaultAnalyzer},
300                {<<"fields">>, {DefaultAnalyzerDef}}
301            ]}
302    end.
303
304
305indexable_fields(Selector) ->
306    TupleTree = mango_selector_text:convert([], Selector),
307    indexable_fields([], TupleTree).
308
309
310indexable_fields(Fields, {op_and, Args}) when is_list(Args) ->
311    lists:foldl(fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
312        Fields, Args);
313
314%% For queries that use array element access or $in operations, two
315%% fields get generated by mango_selector_text:convert. At index
316%% definition time, only one field gets defined. In this situation, we
317%% remove the extra generated field so that the index can be used. For
318%% all other situations, we include the fields as normal.
319indexable_fields(Fields, {op_or, [{op_field, Field0},
320        {op_field, {[Name | _], _}} = Field1]}) ->
321    case lists:member(<<"[]">>, Name) of
322        true ->
323            indexable_fields(Fields, {op_field, Field0});
324        false ->
325            Fields1 = indexable_fields(Fields, {op_field, Field0}),
326            indexable_fields(Fields1, Field1)
327    end;
328indexable_fields(Fields, {op_or, Args}) when is_list(Args) ->
329    lists:foldl(fun(Arg, Fields0) -> indexable_fields(Fields0, Arg) end,
330        Fields, Args);
331
332indexable_fields(Fields, {op_not, {ExistsQuery, Arg}}) when is_tuple(Arg) ->
333    Fields0 = indexable_fields(Fields, ExistsQuery),
334    indexable_fields(Fields0, Arg);
335% forces "$exists" : false to use _all_docs
336indexable_fields(_, {op_not, {_, false}}) ->
337    [];
338
339indexable_fields(Fields, {op_insert, Arg}) when is_binary(Arg) ->
340    Fields;
341
342%% fieldname.[]:length is not a user defined field.
343indexable_fields(Fields, {op_field, {[_, <<":length">>], _}}) ->
344    Fields;
345indexable_fields(Fields, {op_field, {Name, _}}) ->
346    [iolist_to_binary(Name) | Fields];
347
348%% In this particular case, the lucene index is doing a field_exists query
349%% so it is looking at all sorts of combinations of field:* and field.*
350%% We don't add the field because we cannot pre-determine what field will exist.
351%% Hence we just return Fields and make it less restrictive.
352indexable_fields(Fields, {op_fieldname, {_, _}}) ->
353    Fields;
354
355%% Similar idea to op_fieldname but with fieldname:null
356indexable_fields(Fields, {op_null, {_, _}}) ->
357    Fields;
358
359indexable_fields(Fields, {op_default, _}) ->
360    [<<"$default">> | Fields].
361
362
363maybe_reject_index_all_req({Def}, Db) ->
364    DbName = couch_db:name(Db),
365    #user_ctx{name = User} = couch_db:get_user_ctx(Db),
366    Fields = couch_util:get_value(fields, Def),
367    case {Fields, forbid_index_all()} of
368        {all_fields, "true"} ->
369            ?MANGO_ERROR(index_all_disabled);
370        {all_fields, "warn"} ->
371            couch_log:warning("User ~p is indexing all fields in db ~p",
372                [User, DbName]);
373        _ ->
374            ok
375    end.
376
377
378forbid_index_all() ->
379    config:get("mango", "index_all_disabled", "false").
380
381
382-ifdef(TEST).
383-include_lib("eunit/include/eunit.hrl").
384
385
386setup_all() ->
387    Ctx = test_util:start_couch(),
388    meck:expect(couch_log, warning, 2,
389        fun(_,_) ->
390            throw({test_error, logged_warning})
391        end),
392    Ctx.
393
394
395teardown_all(Ctx) ->
396    meck:unload(),
397    test_util:stop_couch(Ctx).
398
399
400setup() ->
401    %default index all def that generates {fields, all_fields}
402    Index = #idx{def={[]}},
403    DbName = <<"testdb">>,
404    UserCtx = #user_ctx{name = <<"u1">>},
405    {ok, Db} = couch_db:clustered_db(DbName, UserCtx),
406    {Index, Db}.
407
408
409teardown(_) ->
410    ok.
411
412
413index_all_test_() ->
414    {
415        setup,
416        fun setup_all/0,
417        fun teardown_all/1,
418        {
419            foreach,
420            fun setup/0,
421            fun teardown/1,
422            [
423                fun forbid_index_all/1,
424                fun default_and_false_index_all/1,
425                fun warn_index_all/1
426            ]
427        }
428    }.
429
430
431forbid_index_all({Idx, Db}) ->
432    ?_test(begin
433        ok = config:set("mango", "index_all_disabled", "true", false),
434        ?assertThrow({mango_error, ?MODULE, index_all_disabled},
435            validate_new(Idx, Db)
436        )
437    end).
438
439
440default_and_false_index_all({Idx, Db}) ->
441    ?_test(begin
442        config:delete("mango", "index_all_disabled", false),
443        {ok, #idx{def={Def}}} = validate_new(Idx, Db),
444        Fields = couch_util:get_value(fields, Def),
445        ?assertEqual(all_fields, Fields),
446        ok = config:set("mango", "index_all_disabled", "false", false),
447        {ok, #idx{def={Def2}}} = validate_new(Idx, Db),
448        Fields2 = couch_util:get_value(fields, Def2),
449        ?assertEqual(all_fields, Fields2)
450    end).
451
452
453warn_index_all({Idx, Db}) ->
454    ?_test(begin
455        ok = config:set("mango", "index_all_disabled", "warn", false),
456        ?assertThrow({test_error, logged_warning}, validate_new(Idx, Db))
457    end).
458
459
460-endif.
461