1# -*- coding: utf-8 -*-
2# This file is part of beets.
3# Copyright 2016, Adrian Sampson.
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15
16"""Parsing of strings into DBCore queries.
17"""
18from __future__ import division, absolute_import, print_function
19
20import re
21import itertools
22from . import query
23
24PARSE_QUERY_PART_REGEX = re.compile(
25    # Non-capturing optional segment for the keyword.
26    r'(-|\^)?'   # Negation prefixes.
27
28    r'(?:'
29    r'(\S+?)'    # The field key.
30    r'(?<!\\):'  # Unescaped :
31    r')?'
32
33    r'(.*)',         # The term itself.
34
35    re.I  # Case-insensitive.
36)
37
38
39def parse_query_part(part, query_classes={}, prefixes={},
40                     default_class=query.SubstringQuery):
41    """Parse a single *query part*, which is a chunk of a complete query
42    string representing a single criterion.
43
44    A query part is a string consisting of:
45    - A *pattern*: the value to look for.
46    - Optionally, a *field name* preceding the pattern, separated by a
47      colon. So in `foo:bar`, `foo` is the field name and `bar` is the
48      pattern.
49    - Optionally, a *query prefix* just before the pattern (and after the
50      optional colon) indicating the type of query that should be used. For
51      example, in `~foo`, `~` might be a prefix. (The set of prefixes to
52      look for is given in the `prefixes` parameter.)
53    - Optionally, a negation indicator, `-` or `^`, at the very beginning.
54
55    Both prefixes and the separating `:` character may be escaped with a
56    backslash to avoid their normal meaning.
57
58    The function returns a tuple consisting of:
59    - The field name: a string or None if it's not present.
60    - The pattern, a string.
61    - The query class to use, which inherits from the base
62      :class:`Query` type.
63    - A negation flag, a bool.
64
65    The three optional parameters determine which query class is used (i.e.,
66    the third return value). They are:
67    - `query_classes`, which maps field names to query classes. These
68      are used when no explicit prefix is present.
69    - `prefixes`, which maps prefix strings to query classes.
70    - `default_class`, the fallback when neither the field nor a prefix
71      indicates a query class.
72
73    So the precedence for determining which query class to return is:
74    prefix, followed by field, and finally the default.
75
76    For example, assuming the `:` prefix is used for `RegexpQuery`:
77    - `'stapler'` -> `(None, 'stapler', SubstringQuery, False)`
78    - `'color:red'` -> `('color', 'red', SubstringQuery, False)`
79    - `':^Quiet'` -> `(None, '^Quiet', RegexpQuery, False)`, because
80      the `^` follows the `:`
81    - `'color::b..e'` -> `('color', 'b..e', RegexpQuery, False)`
82    - `'-color:red'` -> `('color', 'red', SubstringQuery, True)`
83    """
84    # Apply the regular expression and extract the components.
85    part = part.strip()
86    match = PARSE_QUERY_PART_REGEX.match(part)
87
88    assert match  # Regex should always match
89    negate = bool(match.group(1))
90    key = match.group(2)
91    term = match.group(3).replace('\\:', ':')
92
93    # Check whether there's a prefix in the query and use the
94    # corresponding query type.
95    for pre, query_class in prefixes.items():
96        if term.startswith(pre):
97            return key, term[len(pre):], query_class, negate
98
99    # No matching prefix, so use either the query class determined by
100    # the field or the default as a fallback.
101    query_class = query_classes.get(key, default_class)
102    return key, term, query_class, negate
103
104
105def construct_query_part(model_cls, prefixes, query_part):
106    """Parse a *query part* string and return a :class:`Query` object.
107
108    :param model_cls: The :class:`Model` class that this is a query for.
109      This is used to determine the appropriate query types for the
110      model's fields.
111    :param prefixes: A map from prefix strings to :class:`Query` types.
112    :param query_part: The string to parse.
113
114    See the documentation for `parse_query_part` for more information on
115    query part syntax.
116    """
117    # A shortcut for empty query parts.
118    if not query_part:
119        return query.TrueQuery()
120
121    # Use `model_cls` to build up a map from field (or query) names to
122    # `Query` classes.
123    query_classes = {}
124    for k, t in itertools.chain(model_cls._fields.items(),
125                                model_cls._types.items()):
126        query_classes[k] = t.query
127    query_classes.update(model_cls._queries)  # Non-field queries.
128
129    # Parse the string.
130    key, pattern, query_class, negate = \
131        parse_query_part(query_part, query_classes, prefixes)
132
133    # If there's no key (field name) specified, this is a "match
134    # anything" query.
135    if key is None:
136        if issubclass(query_class, query.FieldQuery):
137            # The query type matches a specific field, but none was
138            # specified. So we use a version of the query that matches
139            # any field.
140            out_query = query.AnyFieldQuery(pattern, model_cls._search_fields,
141                                            query_class)
142        else:
143            # Non-field query type.
144            out_query = query_class(pattern)
145
146    # Field queries get constructed according to the name of the field
147    # they are querying.
148    elif issubclass(query_class, query.FieldQuery):
149        key = key.lower()
150        out_query = query_class(key.lower(), pattern, key in model_cls._fields)
151
152    # Non-field (named) query.
153    else:
154        out_query = query_class(pattern)
155
156    # Apply negation.
157    if negate:
158        return query.NotQuery(out_query)
159    else:
160        return out_query
161
162
163def query_from_strings(query_cls, model_cls, prefixes, query_parts):
164    """Creates a collection query of type `query_cls` from a list of
165    strings in the format used by parse_query_part. `model_cls`
166    determines how queries are constructed from strings.
167    """
168    subqueries = []
169    for part in query_parts:
170        subqueries.append(construct_query_part(model_cls, prefixes, part))
171    if not subqueries:  # No terms in query.
172        subqueries = [query.TrueQuery()]
173    return query_cls(subqueries)
174
175
176def construct_sort_part(model_cls, part, case_insensitive=True):
177    """Create a `Sort` from a single string criterion.
178
179    `model_cls` is the `Model` being queried. `part` is a single string
180    ending in ``+`` or ``-`` indicating the sort. `case_insensitive`
181    indicates whether or not the sort should be performed in a case
182    sensitive manner.
183    """
184    assert part, "part must be a field name and + or -"
185    field = part[:-1]
186    assert field, "field is missing"
187    direction = part[-1]
188    assert direction in ('+', '-'), "part must end with + or -"
189    is_ascending = direction == '+'
190
191    if field in model_cls._sorts:
192        sort = model_cls._sorts[field](model_cls, is_ascending,
193                                       case_insensitive)
194    elif field in model_cls._fields:
195        sort = query.FixedFieldSort(field, is_ascending, case_insensitive)
196    else:
197        # Flexible or computed.
198        sort = query.SlowFieldSort(field, is_ascending, case_insensitive)
199    return sort
200
201
202def sort_from_strings(model_cls, sort_parts, case_insensitive=True):
203    """Create a `Sort` from a list of sort criteria (strings).
204    """
205    if not sort_parts:
206        sort = query.NullSort()
207    elif len(sort_parts) == 1:
208        sort = construct_sort_part(model_cls, sort_parts[0], case_insensitive)
209    else:
210        sort = query.MultipleSort()
211        for part in sort_parts:
212            sort.add_sort(construct_sort_part(model_cls, part,
213                                              case_insensitive))
214    return sort
215
216
217def parse_sorted_query(model_cls, parts, prefixes={},
218                       case_insensitive=True):
219    """Given a list of strings, create the `Query` and `Sort` that they
220    represent.
221    """
222    # Separate query token and sort token.
223    query_parts = []
224    sort_parts = []
225
226    # Split up query in to comma-separated subqueries, each representing
227    # an AndQuery, which need to be joined together in one OrQuery
228    subquery_parts = []
229    for part in parts + [u',']:
230        if part.endswith(u','):
231            # Ensure we can catch "foo, bar" as well as "foo , bar"
232            last_subquery_part = part[:-1]
233            if last_subquery_part:
234                subquery_parts.append(last_subquery_part)
235            # Parse the subquery in to a single AndQuery
236            # TODO: Avoid needlessly wrapping AndQueries containing 1 subquery?
237            query_parts.append(query_from_strings(
238                query.AndQuery, model_cls, prefixes, subquery_parts
239            ))
240            del subquery_parts[:]
241        else:
242            # Sort parts (1) end in + or -, (2) don't have a field, and
243            # (3) consist of more than just the + or -.
244            if part.endswith((u'+', u'-')) \
245                    and u':' not in part \
246                    and len(part) > 1:
247                sort_parts.append(part)
248            else:
249                subquery_parts.append(part)
250
251    # Avoid needlessly wrapping single statements in an OR
252    q = query.OrQuery(query_parts) if len(query_parts) > 1 else query_parts[0]
253    s = sort_from_strings(model_cls, sort_parts, case_insensitive)
254    return q, s
255