1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import regex, weakref, operator
10from functools import partial
11from datetime import timedelta
12from collections import deque, OrderedDict
13
14from calibre.constants import preferred_encoding, DEBUG
15from calibre.db.utils import force_to_bool
16from calibre.utils.config_base import prefs
17from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local
18from calibre.utils.icu import primary_contains, sort_key
19from calibre.utils.localization import lang_map, canonicalize_lang
20from calibre.utils.search_query_parser import SearchQueryParser, ParseException
21from polyglot.builtins import iteritems, string_or_bytes
22
23CONTAINS_MATCH = 0
24EQUALS_MATCH   = 1
25REGEXP_MATCH   = 2
26
27# Utils {{{
28
29
30def _matchkind(query, case_sensitive=False):
31    matchkind = CONTAINS_MATCH
32    if (len(query) > 1):
33        if query.startswith('\\'):
34            query = query[1:]
35        elif query.startswith('='):
36            matchkind = EQUALS_MATCH
37            query = query[1:]
38        elif query.startswith('~'):
39            matchkind = REGEXP_MATCH
40            query = query[1:]
41
42    if not case_sensitive and matchkind != REGEXP_MATCH:
43        # leave case in regexps because it can be significant e.g. \S \W \D
44        query = icu_lower(query)
45    return matchkind, query
46
47
48def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensitive=False):
49    if query.startswith('..'):
50        query = query[1:]
51        sq = query[1:]
52        internal_match_ok = True
53    else:
54        internal_match_ok = False
55    for t in value:
56        try:  # ignore regexp exceptions, required because search-ahead tries before typing is finished
57            if not case_sensitive:
58                t = icu_lower(t)
59            if (matchkind == EQUALS_MATCH):
60                if internal_match_ok:
61                    if query == t:
62                        return True
63                    comps = [c.strip() for c in t.split('.') if c.strip()]
64                    for comp in comps:
65                        if sq == comp:
66                            return True
67                elif query[0] == '.':
68                    if t.startswith(query[1:]):
69                        ql = len(query) - 1
70                        if (len(t) == ql) or (t[ql:ql+1] == '.'):
71                            return True
72                elif query == t:
73                    return True
74            elif matchkind == REGEXP_MATCH:
75                flags = regex.UNICODE | regex.VERSION1 | regex.FULLCASE | (0 if case_sensitive else regex.IGNORECASE)
76                if regex.search(query, t, flags) is not None:
77                    return True
78            elif matchkind == CONTAINS_MATCH:
79                if not case_sensitive and use_primary_find_in_search:
80                    if primary_contains(query, t):
81                        return True
82                elif query in t:
83                    return True
84        except regex.error:
85            pass
86    return False
87# }}}
88
89
90class DateSearch:  # {{{
91
92    def __init__(self):
93        self.operators = OrderedDict((
94            ('!=', self.ne),
95            ('>=', self.ge),
96            ('<=', self.le),
97            ('=', self.eq),
98            ('>', self.gt),
99            ('<', self.lt),
100        ))
101        self.local_today         = {'_today', 'today', icu_lower(_('today'))}
102        self.local_yesterday     = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))}
103        self.local_thismonth     = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))}
104        self.daysago_pat = regex.compile(r'(%s|daysago|_daysago)$'%_('daysago'), flags=regex.UNICODE | regex.VERSION1)
105
106    def eq(self, dbdate, query, field_count):
107        if dbdate.year == query.year:
108            if field_count == 1:
109                return True
110            if dbdate.month == query.month:
111                if field_count == 2:
112                    return True
113                return dbdate.day == query.day
114        return False
115
116    def ne(self, *args):
117        return not self.eq(*args)
118
119    def gt(self, dbdate, query, field_count):
120        if dbdate.year > query.year:
121            return True
122        if field_count > 1 and dbdate.year == query.year:
123            if dbdate.month > query.month:
124                return True
125            return (field_count == 3 and dbdate.month == query.month and
126                    dbdate.day > query.day)
127        return False
128
129    def le(self, *args):
130        return not self.gt(*args)
131
132    def lt(self, dbdate, query, field_count):
133        if dbdate.year < query.year:
134            return True
135        if field_count > 1 and dbdate.year == query.year:
136            if dbdate.month < query.month:
137                return True
138            return (field_count == 3 and dbdate.month == query.month and
139                    dbdate.day < query.day)
140        return False
141
142    def ge(self, *args):
143        return not self.lt(*args)
144
145    def __call__(self, query, field_iter):
146        matches = set()
147        if len(query) < 2:
148            return matches
149
150        if query == 'false':
151            for v, book_ids in field_iter():
152                if isinstance(v, (bytes, str)):
153                    if isinstance(v, bytes):
154                        v = v.decode(preferred_encoding, 'replace')
155                    v = parse_date(v)
156                if v is None or v <= UNDEFINED_DATE:
157                    matches |= book_ids
158            return matches
159
160        if query == 'true':
161            for v, book_ids in field_iter():
162                if isinstance(v, (bytes, str)):
163                    if isinstance(v, bytes):
164                        v = v.decode(preferred_encoding, 'replace')
165                    v = parse_date(v)
166                if v is not None and v > UNDEFINED_DATE:
167                    matches |= book_ids
168            return matches
169
170        for k, relop in iteritems(self.operators):
171            if query.startswith(k):
172                query = query[len(k):]
173                break
174        else:
175            relop = self.operators['=']
176
177        if query in self.local_today:
178            qd = now()
179            field_count = 3
180        elif query in self.local_yesterday:
181            qd = now() - timedelta(1)
182            field_count = 3
183        elif query in self.local_thismonth:
184            qd = now()
185            field_count = 2
186        else:
187            m = self.daysago_pat.search(query)
188            if m is not None:
189                num = query[:-len(m.group(1))]
190                try:
191                    qd = now() - timedelta(int(num))
192                except:
193                    raise ParseException(_('Number conversion error: {0}').format(num))
194                field_count = 3
195            else:
196                try:
197                    qd = parse_date(query, as_utc=False)
198                except:
199                    raise ParseException(_('Date conversion error: {0}').format(query))
200                if '-' in query:
201                    field_count = query.count('-') + 1
202                else:
203                    field_count = query.count('/') + 1
204
205        for v, book_ids in field_iter():
206            if isinstance(v, string_or_bytes):
207                v = parse_date(v)
208            if v is not None and relop(dt_as_local(v), qd, field_count):
209                matches |= book_ids
210
211        return matches
212# }}}
213
214
215class NumericSearch:  # {{{
216
217    def __init__(self):
218        self.operators = OrderedDict((
219            ('!=', operator.ne),
220            ('>=', operator.ge),
221            ('<=', operator.le),
222            ('=', operator.eq),
223            ('>', operator.gt),
224            ('<', operator.lt),
225        ))
226
227    def __call__(self, query, field_iter, location, datatype, candidates, is_many=False):
228        matches = set()
229        if not query:
230            return matches
231
232        q = ''
233        cast = adjust = lambda x: x
234        dt = datatype
235
236        if is_many and query in {'true', 'false'}:
237            valcheck = lambda x: True
238            if datatype == 'rating':
239                valcheck = lambda x: x is not None and x > 0
240            found = set()
241            for val, book_ids in field_iter():
242                if valcheck(val):
243                    found |= book_ids
244            return found if query == 'true' else candidates - found
245
246        if query == 'false':
247            if location == 'cover':
248                relop = lambda x,y: not bool(x)
249            else:
250                relop = lambda x,y: x is None
251        elif query == 'true':
252            if location == 'cover':
253                relop = lambda x,y: bool(x)
254            else:
255                relop = lambda x,y: x is not None
256        else:
257            for k, relop in iteritems(self.operators):
258                if query.startswith(k):
259                    query = query[len(k):]
260                    break
261            else:
262                relop = self.operators['=']
263
264            if dt == 'rating':
265                cast = lambda x: 0 if x is None else int(x)
266                adjust = lambda x: x // 2
267            else:
268                # Datatype is empty if the source is a template. Assume float
269                cast = float if dt in ('float', 'composite', 'half-rating', '') else int
270
271            mult = 1.0
272            if len(query) > 1:
273                mult = query[-1].lower()
274                mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0)
275                if mult != 1.0:
276                    query = query[:-1]
277            else:
278                mult = 1.0
279
280            try:
281                q = cast(query) * mult
282            except Exception:
283                raise ParseException(
284                        _('Non-numeric value in query: {0}').format(query))
285            if dt == 'half-rating':
286                q = int(round(q * 2))
287                cast = int
288
289        qfalse = query == 'false'
290        for val, book_ids in field_iter():
291            if val is None:
292                if qfalse:
293                    matches |= book_ids
294                continue
295            try:
296                v = cast(val)
297            except Exception:
298                v = None
299            if v:
300                v = adjust(v)
301            if relop(v, q):
302                matches |= book_ids
303        return matches
304
305# }}}
306
307
308class BooleanSearch:  # {{{
309
310    def __init__(self):
311        self.local_no        = icu_lower(_('no'))
312        self.local_yes       = icu_lower(_('yes'))
313        self.local_unchecked = icu_lower(_('unchecked'))
314        self.local_checked   = icu_lower(_('checked'))
315        self.local_empty     = icu_lower(_('empty'))
316        self.local_blank     = icu_lower(_('blank'))
317        self.local_bool_values = {
318            self.local_no, self.local_unchecked, '_no', 'false', 'no', 'unchecked', '_unchecked',
319            self.local_yes, self.local_checked, 'checked', '_checked', '_yes', 'true', 'yes',
320            self.local_empty, self.local_blank, 'blank', '_blank', '_empty', 'empty'}
321
322    def __call__(self, query, field_iter, bools_are_tristate):
323        matches = set()
324        if query not in self.local_bool_values:
325            raise ParseException(_('Invalid boolean query "{0}"').format(query))
326        for val, book_ids in field_iter():
327            val = force_to_bool(val)
328            if not bools_are_tristate:
329                if val is None or not val:  # item is None or set to false
330                    if query in {self.local_no, self.local_unchecked, 'unchecked', '_unchecked', 'no', '_no', 'false'}:
331                        matches |= book_ids
332                else:  # item is explicitly set to true
333                    if query in {self.local_yes, self.local_checked, 'checked', '_checked', 'yes', '_yes', 'true'}:
334                        matches |= book_ids
335            else:
336                if val is None:
337                    if query in {self.local_empty, self.local_blank, 'blank', '_blank', 'empty', '_empty', 'false'}:
338                        matches |= book_ids
339                elif not val:  # is not None and false
340                    if query in {self.local_no, self.local_unchecked, 'unchecked', '_unchecked', 'no', '_no', 'true'}:
341                        matches |= book_ids
342                else:  # item is not None and true
343                    if query in {self.local_yes, self.local_checked, 'checked', '_checked', 'yes', '_yes', 'true'}:
344                        matches |= book_ids
345        return matches
346
347# }}}
348
349
350class KeyPairSearch:  # {{{
351
352    def __call__(self, query, field_iter, candidates, use_primary_find):
353        matches = set()
354        if ':' in query:
355            q = [q.strip() for q in query.partition(':')[0::2]]
356            keyq, valq = q
357            keyq_mkind, keyq = _matchkind(keyq)
358            valq_mkind, valq = _matchkind(valq)
359        else:
360            keyq = keyq_mkind = ''
361            valq_mkind, valq = _matchkind(query)
362
363        if valq in {'true', 'false'}:
364            found = set()
365            if keyq:
366                for val, book_ids in field_iter():
367                    if val and val.get(keyq, False):
368                        found |= book_ids
369            else:
370                for val, book_ids in field_iter():
371                    if val:
372                        found |= book_ids
373            return found if valq == 'true' else candidates - found
374
375        for m, book_ids in field_iter():
376            for key, val in iteritems(m):
377                if (keyq and not _match(keyq, (key,), keyq_mkind,
378                                        use_primary_find_in_search=use_primary_find)):
379                    continue
380                if (valq and not _match(valq, (val,), valq_mkind,
381                                        use_primary_find_in_search=use_primary_find)):
382                    continue
383                matches |= book_ids
384                break
385
386        return matches
387
388# }}}
389
390
391class SavedSearchQueries:  # {{{
392    queries = {}
393    opt_name = ''
394
395    def __init__(self, db, _opt_name):
396        self.opt_name = _opt_name
397        try:
398            self._db = weakref.ref(db)
399        except TypeError:
400            # db could be None
401            self._db = lambda : None
402        self.load_from_db()
403
404    def load_from_db(self):
405        db = self.db
406        if db is not None:
407            self.queries = db._pref(self.opt_name, default={})
408        else:
409            self.queries = {}
410
411    @property
412    def db(self):
413        return self._db()
414
415    def force_unicode(self, x):
416        if not isinstance(x, str):
417            x = x.decode(preferred_encoding, 'replace')
418        return x
419
420    def add(self, name, value):
421        db = self.db
422        if db is not None:
423            self.queries[self.force_unicode(name)] = self.force_unicode(value).strip()
424            db._set_pref(self.opt_name, self.queries)
425
426    def lookup(self, name):
427        sn = self.force_unicode(name).lower()
428        for n, q in self.queries.items():
429            if sn == n.lower():
430                return q
431        return None
432
433    def delete(self, name):
434        db = self.db
435        if db is not None:
436            self.queries.pop(self.force_unicode(name), False)
437            db._set_pref(self.opt_name, self.queries)
438
439    def rename(self, old_name, new_name):
440        db = self.db
441        if db is not None:
442            self.queries[self.force_unicode(new_name)] = self.queries.get(self.force_unicode(old_name), None)
443            self.queries.pop(self.force_unicode(old_name), False)
444            db._set_pref(self.opt_name, self.queries)
445
446    def set_all(self, smap):
447        db = self.db
448        if db is not None:
449            self.queries = smap
450            db._set_pref(self.opt_name, smap)
451
452    def names(self):
453        return sorted(self.queries, key=sort_key)
454# }}}
455
456
457class Parser(SearchQueryParser):  # {{{
458
459    def __init__(self, dbcache, all_book_ids, gst, date_search, num_search,
460                 bool_search, keypair_search, limit_search_columns, limit_search_columns_to,
461                 locations, virtual_fields, lookup_saved_search, parse_cache):
462        self.dbcache, self.all_book_ids = dbcache, all_book_ids
463        self.all_search_locations = frozenset(locations)
464        self.grouped_search_terms = gst
465        self.date_search, self.num_search = date_search, num_search
466        self.bool_search, self.keypair_search = bool_search, keypair_search
467        self.limit_search_columns, self.limit_search_columns_to = (
468            limit_search_columns, limit_search_columns_to)
469        self.virtual_fields = virtual_fields or {}
470        if 'marked' not in self.virtual_fields:
471            self.virtual_fields['marked'] = self
472        SearchQueryParser.__init__(self, locations, optimize=True, lookup_saved_search=lookup_saved_search, parse_cache=parse_cache)
473
474    @property
475    def field_metadata(self):
476        return self.dbcache.field_metadata
477
478    def universal_set(self):
479        return self.all_book_ids
480
481    def field_iter(self, name, candidates):
482        get_metadata = self.dbcache._get_proxy_metadata
483        try:
484            field = self.dbcache.fields[name]
485        except KeyError:
486            field = self.virtual_fields[name]
487            self.virtual_field_used = True
488        return field.iter_searchable_values(get_metadata, candidates)
489
490    def iter_searchable_values(self, *args, **kwargs):
491        for x in ():
492            yield x, set()
493
494    def parse(self, *args, **kwargs):
495        self.virtual_field_used = False
496        return SearchQueryParser.parse(self, *args, **kwargs)
497
498    def get_matches(self, location, query, candidates=None,
499                    allow_recursion=True):
500        # If candidates is not None, it must not be modified. Changing its
501        # value will break query optimization in the search parser
502        matches = set()
503
504        if candidates is None:
505            candidates = self.all_book_ids
506        if not candidates or not query or not query.strip():
507            return matches
508        if location not in self.all_search_locations:
509            return matches
510
511        if location == 'vl':
512            vl = self.dbcache._pref('virtual_libraries', {}).get(query) if query else None
513            if not vl:
514                raise ParseException(_('No such Virtual library: {}').format(query))
515            try:
516                return candidates & self.dbcache.books_in_virtual_library(
517                            query, virtual_fields=self.virtual_fields)
518            except RuntimeError:
519                raise ParseException(_('Virtual library search is recursive: {}').format(query))
520
521        if (len(location) > 2 and location.startswith('@') and
522                    location[1:] in self.grouped_search_terms):
523            location = location[1:]
524
525        # get metadata key associated with the search term. Eliminates
526        # dealing with plurals and other aliases
527        original_location = location
528        location = self.field_metadata.search_term_to_field_key(
529            icu_lower(location.strip()))
530        # grouped search terms
531        if isinstance(location, list):
532            if allow_recursion:
533                if query.lower() == 'false':
534                    invert = True
535                    query = 'true'
536                else:
537                    invert = False
538                for loc in location:
539                    c = candidates.copy()
540                    m = self.get_matches(loc, query,
541                            candidates=c, allow_recursion=False)
542                    matches |= m
543                    c -= m
544                    if len(c) == 0:
545                        break
546                if invert:
547                    matches = self.all_book_ids - matches
548                return matches
549            raise ParseException(
550                       _('Recursive query group detected: {0}').format(query))
551
552        # If the user has asked to restrict searching over all field, apply
553        # that restriction
554        if (location == 'all' and self.limit_search_columns and
555            self.limit_search_columns_to):
556            terms = set()
557            for l in self.limit_search_columns_to:
558                l = icu_lower(l.strip())
559                if l and l != 'all' and l in self.all_search_locations:
560                    terms.add(l)
561            if terms:
562                c = candidates.copy()
563                for l in terms:
564                    try:
565                        m = self.get_matches(l, query,
566                            candidates=c, allow_recursion=allow_recursion)
567                        matches |= m
568                        c -= m
569                        if len(c) == 0:
570                            break
571                    except:
572                        pass
573                return matches
574
575        upf = prefs['use_primary_find_in_search']
576
577        if location in self.field_metadata:
578            fm = self.field_metadata[location]
579            dt = fm['datatype']
580
581            # take care of dates special case
582            if (dt == 'datetime' or (
583                dt == 'composite' and
584                fm['display'].get('composite_sort', '') == 'date')):
585                if location == 'date':
586                    location = 'timestamp'
587                return self.date_search(
588                    icu_lower(query), partial(self.field_iter, location, candidates))
589
590            # take care of numbers special case
591            if (dt in ('rating', 'int', 'float') or
592                    (dt == 'composite' and
593                     fm['display'].get('composite_sort', '') == 'number')):
594                if location == 'id':
595                    is_many = False
596
597                    def fi(default_value=None):
598                        for qid in candidates:
599                            yield qid, {qid}
600                else:
601                    field = self.dbcache.fields[location]
602                    fi, is_many = partial(self.field_iter, location, candidates), field.is_many
603                if dt == 'rating' and fm['display'].get('allow_half_stars'):
604                    dt = 'half-rating'
605                return self.num_search(
606                    icu_lower(query), fi, location, dt, candidates, is_many=is_many)
607
608            # take care of the 'count' operator for is_multiples
609            if (fm['is_multiple'] and
610                len(query) > 1 and query[0] == '#' and query[1] in '=<>!'):
611                return self.num_search(icu_lower(query[1:]), partial(
612                        self.dbcache.fields[location].iter_counts, candidates,
613                        get_metadata=self.dbcache._get_proxy_metadata),
614                    location, dt, candidates)
615
616            # take care of boolean special case
617            if dt == 'bool':
618                return self.bool_search(icu_lower(query),
619                                partial(self.field_iter, location, candidates),
620                                self.dbcache._pref('bools_are_tristate'))
621
622            # special case: colon-separated fields such as identifiers. isbn
623            # is a special case within the case
624            if fm.get('is_csp', False):
625                field_iter = partial(self.field_iter, location, candidates)
626                if location == 'identifiers' and original_location == 'isbn':
627                    return self.keypair_search('=isbn:'+query, field_iter,
628                                        candidates, upf)
629                return self.keypair_search(query, field_iter, candidates, upf)
630
631        # check for user categories
632        if len(location) >= 2 and location.startswith('@'):
633            return self.get_user_category_matches(location[1:], icu_lower(query), candidates)
634
635        # Everything else (and 'all' matches)
636        case_sensitive = prefs['case_sensitive']
637
638        if location == 'template':
639            try:
640                template, sep, query = regex.split('#@#:([tdnb]):', query, flags=regex.IGNORECASE)
641                if sep:
642                    sep = sep.lower()
643                else:
644                    sep = 't'
645            except:
646                if DEBUG:
647                    import traceback
648                    traceback.print_exc()
649                raise ParseException(_('search template: missing or invalid separator. Valid separators are: {}').format('#@#:[tdnb]:'))
650            matchkind, query = _matchkind(query, case_sensitive=case_sensitive)
651            matches = set()
652            error_string = '*@*TEMPLATE_ERROR*@*'
653            template_cache = {}
654            for book_id in candidates:
655                mi = self.dbcache.get_proxy_metadata(book_id)
656                val = mi.formatter.safe_format(template, {}, error_string, mi,
657                                            column_name='search template',
658                                            template_cache=template_cache)
659                if val.startswith(error_string):
660                    raise ParseException(val[len(error_string):])
661                if sep == 't':
662                    if _match(query, [val,], matchkind, use_primary_find_in_search=upf,
663                              case_sensitive=case_sensitive):
664                        matches.add(book_id)
665                elif sep == 'n' and val:
666                    matches.update(self.num_search(
667                        icu_lower(query), {val:{book_id,}}.items, '', '',
668                        {book_id,}, is_many=False))
669                elif sep == 'd' and val:
670                    matches.update(self.date_search(
671                            icu_lower(query), {val:{book_id,}}.items))
672                elif sep == 'b':
673                    matches.update(self.bool_search(icu_lower(query),
674                            {'True' if val else 'False':{book_id,}}.items, False))
675
676            return matches
677
678        matchkind, query = _matchkind(query, case_sensitive=case_sensitive)
679        all_locs = set()
680        text_fields = set()
681        field_metadata = {}
682
683        for x, fm in self.field_metadata.iter_items():
684            if x.startswith('@'):
685                continue
686            if fm['search_terms'] and x not in {'series_sort', 'id'}:
687                if x not in self.virtual_fields and x != 'uuid':
688                    # We dont search virtual fields because if we do, search
689                    # caching will not be used
690                    all_locs.add(x)
691                field_metadata[x] = fm
692                if fm['datatype'] in {'composite', 'text', 'comments', 'series', 'enumeration'}:
693                    text_fields.add(x)
694
695        locations = all_locs if location == 'all' else {location}
696
697        current_candidates = set(candidates)
698
699        try:
700            rating_query = int(float(query)) * 2
701        except:
702            rating_query = None
703
704        try:
705            int_query = int(float(query))
706        except:
707            int_query = None
708
709        try:
710            float_query = float(query)
711        except:
712            float_query = None
713
714        for location in locations:
715            current_candidates -= matches
716            q = query
717            if location == 'languages':
718                q = canonicalize_lang(query)
719                if q is None:
720                    lm = lang_map()
721                    rm = {v.lower():k for k,v in iteritems(lm)}
722                    q = rm.get(query, query)
723
724            if matchkind == CONTAINS_MATCH and q.lower() in {'true', 'false'}:
725                found = set()
726                for val, book_ids in self.field_iter(location, current_candidates):
727                    if val and (not hasattr(val, 'strip') or val.strip()):
728                        found |= book_ids
729                matches |= (found if q.lower() == 'true' else (current_candidates-found))
730                continue
731
732            dt = field_metadata.get(location, {}).get('datatype', None)
733            if dt == 'rating':
734                if rating_query is not None:
735                    for val, book_ids in self.field_iter(location, current_candidates):
736                        if val == rating_query:
737                            matches |= book_ids
738                continue
739
740            if dt == 'float':
741                if float_query is not None:
742                    for val, book_ids in self.field_iter(location, current_candidates):
743                        if val == float_query:
744                            matches |= book_ids
745                continue
746
747            if dt == 'int':
748                if int_query is not None:
749                    for val, book_ids in self.field_iter(location, current_candidates):
750                        if val == int_query:
751                            matches |= book_ids
752                continue
753
754            if location in text_fields:
755                for val, book_ids in self.field_iter(location, current_candidates):
756                    if val is not None:
757                        if isinstance(val, string_or_bytes):
758                            val = (val,)
759                        if _match(q, val, matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive):
760                            matches |= book_ids
761
762            if location == 'series_sort':
763                book_lang_map = self.dbcache.fields['languages'].book_value_map
764                for val, book_ids in self.dbcache.fields['series'].iter_searchable_values_for_sort(current_candidates, book_lang_map):
765                    if val is not None:
766                        if _match(q, (val,), matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive):
767                            matches |= book_ids
768
769        return matches
770
771    def get_user_category_matches(self, location, query, candidates):
772        matches = set()
773        if len(query) < 2:
774            return matches
775
776        user_cats = self.dbcache._pref('user_categories')
777        c = set(candidates)
778
779        if query.startswith('.'):
780            check_subcats = True
781            query = query[1:]
782        else:
783            check_subcats = False
784
785        for key in user_cats:
786            if key == location or (check_subcats and key.startswith(location + '.')):
787                for (item, category, ign) in user_cats[key]:
788                    s = self.get_matches(category, '=' + item, candidates=c)
789                    c -= s
790                    matches |= s
791        if query == 'false':
792            return candidates - matches
793        return matches
794# }}}
795
796
797class LRUCache:  # {{{
798
799    'A simple Least-Recently-Used cache'
800
801    def __init__(self, limit=50):
802        self.item_map = {}
803        self.age_map = deque()
804        self.limit = limit
805
806    def _move_up(self, key):
807        if key != self.age_map[-1]:
808            self.age_map.remove(key)
809            self.age_map.append(key)
810
811    def add(self, key, val):
812        if key in self.item_map:
813            self._move_up(key)
814            return
815
816        if len(self.age_map) >= self.limit:
817            self.item_map.pop(self.age_map.popleft())
818
819        self.item_map[key] = val
820        self.age_map.append(key)
821    __setitem__  = add
822
823    def get(self, key, default=None):
824        ans = self.item_map.get(key, default)
825        if ans is not default:
826            self._move_up(key)
827        return ans
828
829    def clear(self):
830        self.item_map.clear()
831        self.age_map.clear()
832
833    def pop(self, key, default=None):
834        self.item_map.pop(key, default)
835        try:
836            self.age_map.remove(key)
837        except ValueError:
838            pass
839
840    def __contains__(self, key):
841        return key in self.item_map
842
843    def __len__(self):
844        return len(self.age_map)
845
846    def __getitem__(self, key):
847        return self.get(key)
848
849    def __iter__(self):
850        return iteritems(self.item_map)
851# }}}
852
853
854class Search:
855
856    MAX_CACHE_UPDATE = 50
857
858    def __init__(self, db, opt_name, all_search_locations=()):
859        self.all_search_locations = all_search_locations
860        self.date_search = DateSearch()
861        self.num_search = NumericSearch()
862        self.bool_search = BooleanSearch()
863        self.keypair_search = KeyPairSearch()
864        self.saved_searches = SavedSearchQueries(db, opt_name)
865        self.cache = LRUCache()
866        self.parse_cache = LRUCache(limit=100)
867
868    def get_saved_searches(self):
869        return self.saved_searches
870
871    def change_locations(self, newlocs):
872        if frozenset(newlocs) != frozenset(self.all_search_locations):
873            self.clear_caches()
874            self.parse_cache.clear()
875        self.all_search_locations = newlocs
876
877    def update_or_clear(self, dbcache, book_ids=None):
878        if book_ids and (len(book_ids) * len(self.cache)) <= self.MAX_CACHE_UPDATE:
879            self.update_caches(dbcache, book_ids)
880        else:
881            self.clear_caches()
882
883    def clear_caches(self):
884        self.cache.clear()
885
886    def update_caches(self, dbcache, book_ids):
887        sqp = self.create_parser(dbcache)
888        try:
889            return self._update_caches(sqp, book_ids)
890        finally:
891            sqp.dbcache = sqp.lookup_saved_search = None
892
893    def discard_books(self, book_ids):
894        book_ids = set(book_ids)
895        for query, result in self.cache:
896            result.difference_update(book_ids)
897
898    def _update_caches(self, sqp, book_ids):
899        book_ids = sqp.all_book_ids = set(book_ids)
900        remove = set()
901        for query, result in tuple(self.cache):
902            try:
903                matches = sqp.parse(query)
904            except ParseException:
905                remove.add(query)
906            else:
907                # remove books that no longer match
908                result.difference_update(book_ids - matches)
909                # add books that now match but did not before
910                result.update(matches)
911        for query in remove:
912            self.cache.pop(query)
913
914    def create_parser(self, dbcache, virtual_fields=None):
915        return Parser(
916            dbcache, set(), dbcache._pref('grouped_search_terms'),
917            self.date_search, self.num_search, self.bool_search,
918            self.keypair_search,
919            prefs['limit_search_columns'],
920            prefs['limit_search_columns_to'], self.all_search_locations,
921            virtual_fields, self.saved_searches.lookup, self.parse_cache)
922
923    def __call__(self, dbcache, query, search_restriction, virtual_fields=None, book_ids=None):
924        '''
925        Return the set of ids of all records that match the specified
926        query and restriction
927        '''
928        # We construct a new parser instance per search as the parse is not
929        # thread safe.
930        sqp = self.create_parser(dbcache, virtual_fields)
931        try:
932            return self._do_search(sqp, query, search_restriction, dbcache, book_ids=book_ids)
933        finally:
934            sqp.dbcache = sqp.lookup_saved_search = None
935
936    def query_is_cacheable(self, sqp, dbcache, query):
937        if query:
938            for name, value in sqp.get_queried_fields(query):
939                if name == 'template' and '#@#:d:' in value:
940                    return False
941                elif name in dbcache.field_metadata.all_field_keys():
942                    fm = dbcache.field_metadata[name]
943                    if fm['datatype'] == 'datetime':
944                        return False
945                    if fm['datatype'] == 'composite':
946                        if fm.get('display', {}).get('composite_sort', '') == 'date':
947                            return False
948        return True
949
950    def _do_search(self, sqp, query, search_restriction, dbcache, book_ids=None):
951        ''' Do the search, caching the results. Results are cached only if the
952        search is on the full library and no virtual field is searched on '''
953        if isinstance(search_restriction, bytes):
954            search_restriction = search_restriction.decode('utf-8')
955        if isinstance(query, bytes):
956            query = query.decode('utf-8')
957
958        query = query.strip()
959        use_cache = self.query_is_cacheable(sqp, dbcache, query)
960
961        if use_cache and book_ids is None and query and not search_restriction:
962            cached = self.cache.get(query)
963            if cached is not None:
964                return cached
965
966        restricted_ids = all_book_ids = dbcache._all_book_ids(type=set)
967        if search_restriction and search_restriction.strip():
968            sr = search_restriction.strip()
969            sqp.all_book_ids = all_book_ids if book_ids is None else book_ids
970            if self.query_is_cacheable(sqp, dbcache, sr):
971                cached = self.cache.get(sr)
972                if cached is None:
973                    restricted_ids = sqp.parse(sr)
974                    if not sqp.virtual_field_used and sqp.all_book_ids is all_book_ids:
975                        self.cache.add(sr, restricted_ids)
976                else:
977                    restricted_ids = cached
978                    if book_ids is not None:
979                        restricted_ids = book_ids.intersection(restricted_ids)
980            else:
981                restricted_ids = sqp.parse(sr)
982        elif book_ids is not None:
983            restricted_ids = book_ids
984
985        if not query:
986            return restricted_ids
987
988        if use_cache and restricted_ids is all_book_ids:
989            cached = self.cache.get(query)
990            if cached is not None:
991                return cached
992
993        sqp.all_book_ids = restricted_ids
994        result = sqp.parse(query)
995
996        if not sqp.virtual_field_used and sqp.all_book_ids is all_book_ids:
997            self.cache.add(query, result)
998
999        return result
1000