1import operator 2import re 3 4from functools import partial 5 6from .query import MATCH_NONE, Phrase, PlainText 7 8 9NOT_SET = object() 10 11 12def balanced_reduce(operator, seq, initializer=NOT_SET): 13 """ 14 Has the same result as Python's reduce function, but performs the calculations in a different order. 15 16 This is important when the operator is constructing data structures such as search query clases. 17 This method will make the resulting data structures flatter, so operations that need to traverse 18 them don't end up crashing with recursion errors. 19 20 For example: 21 22 Python's builtin reduce() function will do the following calculation: 23 24 reduce(add, [1, 2, 3, 4, 5, 6, 7, 8]) 25 (1 + (2 + (3 + (4 + (5 + (6 + (7 + 8))))))) 26 27 When using this with query classes, it would create a large data structure with a depth of 7 28 Whereas balanced_reduce will execute this like so: 29 30 balanced_reduce(add, [1, 2, 3, 4, 5, 6, 7, 8]) 31 ((1 + 2) + (3 + 4)) + ((5 + 6) + (7 + 8)) 32 33 Which only has a depth of 2 34 """ 35 # Casting all iterables to list makes the implementation simpler 36 if not isinstance(seq, list): 37 seq = list(seq) 38 39 # Note, it needs to be possible to use None as an initial value 40 if initializer is not NOT_SET: 41 if len(seq) == 0: 42 return initializer 43 else: 44 return operator(initializer, balanced_reduce(operator, seq)) 45 46 if len(seq) == 0: 47 raise TypeError("reduce() of empty sequence with no initial value") 48 elif len(seq) == 1: 49 return seq[0] 50 else: 51 break_point = len(seq) // 2 52 first_set = balanced_reduce(operator, seq[:break_point]) 53 second_set = balanced_reduce(operator, seq[break_point:]) 54 return operator(first_set, second_set) 55 56 57# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...) 58OR = partial(balanced_reduce, operator.or_) 59# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...) 60AND = partial(balanced_reduce, operator.and_) 61# Reduce any iterable to a single value using an addition 62ADD = partial(balanced_reduce, operator.add) 63# Reduce any iterable to a single value using a multiplication 64MUL = partial(balanced_reduce, operator.mul) 65 66MAX_QUERY_STRING_LENGTH = 255 67 68 69def normalise_query_string(query_string): 70 # Truncate query string 71 if len(query_string) > MAX_QUERY_STRING_LENGTH: 72 query_string = query_string[:MAX_QUERY_STRING_LENGTH] 73 # Convert query_string to lowercase 74 query_string = query_string.lower() 75 76 # Remove leading, trailing and multiple spaces 77 query_string = re.sub(' +', ' ', query_string).strip() 78 79 return query_string 80 81 82def separate_filters_from_query(query_string): 83 filters_regexp = r'(\w+):(\w+|".+")' 84 85 filters = {} 86 for match_object in re.finditer(filters_regexp, query_string): 87 key, value = match_object.groups() 88 filters[key] = value.strip("\"") 89 90 query_string = re.sub(filters_regexp, '', query_string).strip() 91 92 return filters, query_string 93 94 95def parse_query_string(query_string, operator=None, zero_terms=MATCH_NONE): 96 """ 97 This takes a query string typed in by a user and extracts the following: 98 99 - Quoted terms (for phrase search) 100 - Filters 101 102 For example, the following query: 103 104 `hello "this is a phrase" live:true` would be parsed into: 105 106 filters: {'live': 'true'} 107 tokens: And([PlainText('hello'), Phrase('this is a phrase')]) 108 """ 109 filters, query_string = separate_filters_from_query(query_string) 110 111 is_phrase = False 112 tokens = [] 113 for part in query_string.split('"'): 114 part = part.strip() 115 116 if part: 117 if is_phrase: 118 tokens.append(Phrase(part)) 119 else: 120 tokens.append(PlainText(part, operator=operator or PlainText.DEFAULT_OPERATOR)) 121 122 is_phrase = not is_phrase 123 124 if tokens: 125 if operator == 'or': 126 search_query = OR(tokens) 127 else: 128 search_query = AND(tokens) 129 else: 130 search_query = zero_terms 131 132 return filters, search_query 133