1import operator
2import re
3
4from functools import partial
5
6from .query import MATCH_NONE, Phrase, PlainText
7
8
9NOT_SET = object()
10
11
12def balanced_reduce(operator, seq, initializer=NOT_SET):
13    """
14    Has the same result as Python's reduce function, but performs the calculations in a different order.
15
16    This is important when the operator is constructing data structures such as search query clases.
17    This method will make the resulting data structures flatter, so operations that need to traverse
18    them don't end up crashing with recursion errors.
19
20    For example:
21
22    Python's builtin reduce() function will do the following calculation:
23
24    reduce(add, [1, 2, 3, 4, 5, 6, 7, 8])
25    (1 + (2 + (3 + (4 + (5 + (6 + (7 + 8)))))))
26
27    When using this with query classes, it would create a large data structure with a depth of 7
28    Whereas balanced_reduce will execute this like so:
29
30    balanced_reduce(add, [1, 2, 3, 4, 5, 6, 7, 8])
31    ((1 + 2) + (3 + 4)) + ((5 + 6) + (7 + 8))
32
33    Which only has a depth of 2
34    """
35    # Casting all iterables to list makes the implementation simpler
36    if not isinstance(seq, list):
37        seq = list(seq)
38
39    # Note, it needs to be possible to use None as an initial value
40    if initializer is not NOT_SET:
41        if len(seq) == 0:
42            return initializer
43        else:
44            return operator(initializer, balanced_reduce(operator, seq))
45
46    if len(seq) == 0:
47        raise TypeError("reduce() of empty sequence with no initial value")
48    elif len(seq) == 1:
49        return seq[0]
50    else:
51        break_point = len(seq) // 2
52        first_set = balanced_reduce(operator, seq[:break_point])
53        second_set = balanced_reduce(operator, seq[break_point:])
54        return operator(first_set, second_set)
55
56
57# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...)
58OR = partial(balanced_reduce, operator.or_)
59# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...)
60AND = partial(balanced_reduce, operator.and_)
61# Reduce any iterable to a single value using an addition
62ADD = partial(balanced_reduce, operator.add)
63# Reduce any iterable to a single value using a multiplication
64MUL = partial(balanced_reduce, operator.mul)
65
66MAX_QUERY_STRING_LENGTH = 255
67
68
69def normalise_query_string(query_string):
70    # Truncate query string
71    if len(query_string) > MAX_QUERY_STRING_LENGTH:
72        query_string = query_string[:MAX_QUERY_STRING_LENGTH]
73    # Convert query_string to lowercase
74    query_string = query_string.lower()
75
76    # Remove leading, trailing and multiple spaces
77    query_string = re.sub(' +', ' ', query_string).strip()
78
79    return query_string
80
81
82def separate_filters_from_query(query_string):
83    filters_regexp = r'(\w+):(\w+|".+")'
84
85    filters = {}
86    for match_object in re.finditer(filters_regexp, query_string):
87        key, value = match_object.groups()
88        filters[key] = value.strip("\"")
89
90    query_string = re.sub(filters_regexp, '', query_string).strip()
91
92    return filters, query_string
93
94
95def parse_query_string(query_string, operator=None, zero_terms=MATCH_NONE):
96    """
97    This takes a query string typed in by a user and extracts the following:
98
99     - Quoted terms (for phrase search)
100     - Filters
101
102    For example, the following query:
103
104      `hello "this is a phrase" live:true` would be parsed into:
105
106    filters: {'live': 'true'}
107    tokens: And([PlainText('hello'), Phrase('this is a phrase')])
108    """
109    filters, query_string = separate_filters_from_query(query_string)
110
111    is_phrase = False
112    tokens = []
113    for part in query_string.split('"'):
114        part = part.strip()
115
116        if part:
117            if is_phrase:
118                tokens.append(Phrase(part))
119            else:
120                tokens.append(PlainText(part, operator=operator or PlainText.DEFAULT_OPERATOR))
121
122        is_phrase = not is_phrase
123
124    if tokens:
125        if operator == 'or':
126            search_query = OR(tokens)
127        else:
128            search_query = AND(tokens)
129    else:
130        search_query = zero_terms
131
132    return filters, search_query
133