1# -*- coding: utf-8 -*- 2# This file is part of beets. 3# Copyright 2016, Adrian Sampson. 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15 16"""Parsing of strings into DBCore queries. 17""" 18from __future__ import division, absolute_import, print_function 19 20import re 21import itertools 22from . import query 23 24PARSE_QUERY_PART_REGEX = re.compile( 25 # Non-capturing optional segment for the keyword. 26 r'(-|\^)?' # Negation prefixes. 27 28 r'(?:' 29 r'(\S+?)' # The field key. 30 r'(?<!\\):' # Unescaped : 31 r')?' 32 33 r'(.*)', # The term itself. 34 35 re.I # Case-insensitive. 36) 37 38 39def parse_query_part(part, query_classes={}, prefixes={}, 40 default_class=query.SubstringQuery): 41 """Parse a single *query part*, which is a chunk of a complete query 42 string representing a single criterion. 43 44 A query part is a string consisting of: 45 - A *pattern*: the value to look for. 46 - Optionally, a *field name* preceding the pattern, separated by a 47 colon. So in `foo:bar`, `foo` is the field name and `bar` is the 48 pattern. 49 - Optionally, a *query prefix* just before the pattern (and after the 50 optional colon) indicating the type of query that should be used. For 51 example, in `~foo`, `~` might be a prefix. (The set of prefixes to 52 look for is given in the `prefixes` parameter.) 53 - Optionally, a negation indicator, `-` or `^`, at the very beginning. 54 55 Both prefixes and the separating `:` character may be escaped with a 56 backslash to avoid their normal meaning. 57 58 The function returns a tuple consisting of: 59 - The field name: a string or None if it's not present. 60 - The pattern, a string. 61 - The query class to use, which inherits from the base 62 :class:`Query` type. 63 - A negation flag, a bool. 64 65 The three optional parameters determine which query class is used (i.e., 66 the third return value). They are: 67 - `query_classes`, which maps field names to query classes. These 68 are used when no explicit prefix is present. 69 - `prefixes`, which maps prefix strings to query classes. 70 - `default_class`, the fallback when neither the field nor a prefix 71 indicates a query class. 72 73 So the precedence for determining which query class to return is: 74 prefix, followed by field, and finally the default. 75 76 For example, assuming the `:` prefix is used for `RegexpQuery`: 77 - `'stapler'` -> `(None, 'stapler', SubstringQuery, False)` 78 - `'color:red'` -> `('color', 'red', SubstringQuery, False)` 79 - `':^Quiet'` -> `(None, '^Quiet', RegexpQuery, False)`, because 80 the `^` follows the `:` 81 - `'color::b..e'` -> `('color', 'b..e', RegexpQuery, False)` 82 - `'-color:red'` -> `('color', 'red', SubstringQuery, True)` 83 """ 84 # Apply the regular expression and extract the components. 85 part = part.strip() 86 match = PARSE_QUERY_PART_REGEX.match(part) 87 88 assert match # Regex should always match 89 negate = bool(match.group(1)) 90 key = match.group(2) 91 term = match.group(3).replace('\\:', ':') 92 93 # Check whether there's a prefix in the query and use the 94 # corresponding query type. 95 for pre, query_class in prefixes.items(): 96 if term.startswith(pre): 97 return key, term[len(pre):], query_class, negate 98 99 # No matching prefix, so use either the query class determined by 100 # the field or the default as a fallback. 101 query_class = query_classes.get(key, default_class) 102 return key, term, query_class, negate 103 104 105def construct_query_part(model_cls, prefixes, query_part): 106 """Parse a *query part* string and return a :class:`Query` object. 107 108 :param model_cls: The :class:`Model` class that this is a query for. 109 This is used to determine the appropriate query types for the 110 model's fields. 111 :param prefixes: A map from prefix strings to :class:`Query` types. 112 :param query_part: The string to parse. 113 114 See the documentation for `parse_query_part` for more information on 115 query part syntax. 116 """ 117 # A shortcut for empty query parts. 118 if not query_part: 119 return query.TrueQuery() 120 121 # Use `model_cls` to build up a map from field (or query) names to 122 # `Query` classes. 123 query_classes = {} 124 for k, t in itertools.chain(model_cls._fields.items(), 125 model_cls._types.items()): 126 query_classes[k] = t.query 127 query_classes.update(model_cls._queries) # Non-field queries. 128 129 # Parse the string. 130 key, pattern, query_class, negate = \ 131 parse_query_part(query_part, query_classes, prefixes) 132 133 # If there's no key (field name) specified, this is a "match 134 # anything" query. 135 if key is None: 136 if issubclass(query_class, query.FieldQuery): 137 # The query type matches a specific field, but none was 138 # specified. So we use a version of the query that matches 139 # any field. 140 out_query = query.AnyFieldQuery(pattern, model_cls._search_fields, 141 query_class) 142 else: 143 # Non-field query type. 144 out_query = query_class(pattern) 145 146 # Field queries get constructed according to the name of the field 147 # they are querying. 148 elif issubclass(query_class, query.FieldQuery): 149 key = key.lower() 150 out_query = query_class(key.lower(), pattern, key in model_cls._fields) 151 152 # Non-field (named) query. 153 else: 154 out_query = query_class(pattern) 155 156 # Apply negation. 157 if negate: 158 return query.NotQuery(out_query) 159 else: 160 return out_query 161 162 163def query_from_strings(query_cls, model_cls, prefixes, query_parts): 164 """Creates a collection query of type `query_cls` from a list of 165 strings in the format used by parse_query_part. `model_cls` 166 determines how queries are constructed from strings. 167 """ 168 subqueries = [] 169 for part in query_parts: 170 subqueries.append(construct_query_part(model_cls, prefixes, part)) 171 if not subqueries: # No terms in query. 172 subqueries = [query.TrueQuery()] 173 return query_cls(subqueries) 174 175 176def construct_sort_part(model_cls, part, case_insensitive=True): 177 """Create a `Sort` from a single string criterion. 178 179 `model_cls` is the `Model` being queried. `part` is a single string 180 ending in ``+`` or ``-`` indicating the sort. `case_insensitive` 181 indicates whether or not the sort should be performed in a case 182 sensitive manner. 183 """ 184 assert part, "part must be a field name and + or -" 185 field = part[:-1] 186 assert field, "field is missing" 187 direction = part[-1] 188 assert direction in ('+', '-'), "part must end with + or -" 189 is_ascending = direction == '+' 190 191 if field in model_cls._sorts: 192 sort = model_cls._sorts[field](model_cls, is_ascending, 193 case_insensitive) 194 elif field in model_cls._fields: 195 sort = query.FixedFieldSort(field, is_ascending, case_insensitive) 196 else: 197 # Flexible or computed. 198 sort = query.SlowFieldSort(field, is_ascending, case_insensitive) 199 return sort 200 201 202def sort_from_strings(model_cls, sort_parts, case_insensitive=True): 203 """Create a `Sort` from a list of sort criteria (strings). 204 """ 205 if not sort_parts: 206 sort = query.NullSort() 207 elif len(sort_parts) == 1: 208 sort = construct_sort_part(model_cls, sort_parts[0], case_insensitive) 209 else: 210 sort = query.MultipleSort() 211 for part in sort_parts: 212 sort.add_sort(construct_sort_part(model_cls, part, 213 case_insensitive)) 214 return sort 215 216 217def parse_sorted_query(model_cls, parts, prefixes={}, 218 case_insensitive=True): 219 """Given a list of strings, create the `Query` and `Sort` that they 220 represent. 221 """ 222 # Separate query token and sort token. 223 query_parts = [] 224 sort_parts = [] 225 226 # Split up query in to comma-separated subqueries, each representing 227 # an AndQuery, which need to be joined together in one OrQuery 228 subquery_parts = [] 229 for part in parts + [u',']: 230 if part.endswith(u','): 231 # Ensure we can catch "foo, bar" as well as "foo , bar" 232 last_subquery_part = part[:-1] 233 if last_subquery_part: 234 subquery_parts.append(last_subquery_part) 235 # Parse the subquery in to a single AndQuery 236 # TODO: Avoid needlessly wrapping AndQueries containing 1 subquery? 237 query_parts.append(query_from_strings( 238 query.AndQuery, model_cls, prefixes, subquery_parts 239 )) 240 del subquery_parts[:] 241 else: 242 # Sort parts (1) end in + or -, (2) don't have a field, and 243 # (3) consist of more than just the + or -. 244 if part.endswith((u'+', u'-')) \ 245 and u':' not in part \ 246 and len(part) > 1: 247 sort_parts.append(part) 248 else: 249 subquery_parts.append(part) 250 251 # Avoid needlessly wrapping single statements in an OR 252 q = query.OrQuery(query_parts) if len(query_parts) > 1 else query_parts[0] 253 s = sort_from_strings(model_cls, sort_parts, case_insensitive) 254 return q, s 255