1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import time, traceback, locale
10from itertools import repeat
11from datetime import timedelta
12from threading import Thread
13
14from calibre.utils.config import tweaks, prefs
15from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort
16from calibre.utils.search_query_parser import SearchQueryParser
17from calibre.utils.search_query_parser import ParseException
18from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc)
19from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match
20from calibre.ebooks.metadata import title_sort, author_to_author_sort
21from calibre.ebooks.metadata.opf2 import metadata_to_opf
22from calibre import prints, force_unicode
23from polyglot.builtins import iteritems, itervalues, string_or_bytes, cmp
24
25
26class MetadataBackup(Thread):  # {{{
27    '''
28    Continuously backup changed metadata into OPF files
29    in the book directory. This class runs in its own
30    thread and makes sure that the actual file write happens in the
31    GUI thread to prevent Windows' file locking from causing problems.
32    '''
33
34    def __init__(self, db):
35        Thread.__init__(self)
36        self.daemon = True
37        self.db = db
38        self.keep_running = True
39        from calibre.gui2 import FunctionDispatcher
40        self.do_write = FunctionDispatcher(self.write)
41        self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
42        self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
43        self.set_dirtied = FunctionDispatcher(db.dirtied)
44
45    def stop(self):
46        self.keep_running = False
47
48    def break_cycles(self):
49        # Break cycles so that this object doesn't hold references to db
50        self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \
51            self.set_dirtied = self.db = None
52
53    def run(self):
54        while self.keep_running:
55            try:
56                time.sleep(2)  # Limit to one book per two seconds
57                (id_, sequence) = self.db.get_a_dirtied_book()
58                if id_ is None:
59                    continue
60                # print 'writer thread', id_, sequence
61            except:
62                # Happens during interpreter shutdown
63                break
64            if not self.keep_running:
65                break
66
67            try:
68                path, mi, sequence = self.get_metadata_for_dump(id_)
69            except:
70                prints('Failed to get backup metadata for id:', id_, 'once')
71                traceback.print_exc()
72                time.sleep(2)
73                try:
74                    path, mi, sequence = self.get_metadata_for_dump(id_)
75                except:
76                    prints('Failed to get backup metadata for id:', id_, 'again, giving up')
77                    traceback.print_exc()
78                    continue
79
80            if mi is None:
81                self.clear_dirtied(id_, sequence)
82                continue
83            if not self.keep_running:
84                break
85
86            # Give the GUI thread a chance to do something. Python threads don't
87            # have priorities, so this thread would naturally keep the processor
88            # until some scheduling event happens. The sleep makes such an event
89            time.sleep(0.1)
90            try:
91                raw = metadata_to_opf(mi)
92            except:
93                prints('Failed to convert to opf for id:', id_)
94                traceback.print_exc()
95                continue
96
97            if not self.keep_running:
98                break
99
100            time.sleep(0.1)  # Give the GUI thread a chance to do something
101            try:
102                self.do_write(path, raw)
103            except:
104                prints('Failed to write backup metadata for id:', id_, 'once')
105                time.sleep(2)
106                try:
107                    self.do_write(path, raw)
108                except:
109                    prints('Failed to write backup metadata for id:', id_,
110                            'again, giving up')
111                    continue
112
113            self.clear_dirtied(id_, sequence)
114        self.break_cycles()
115
116    def write(self, path, raw):
117        with lopen(path, 'wb') as f:
118            f.write(raw)
119
120
121# }}}
122
123# Global utility function for get_match here and in gui2/library.py
124# This is a global for performance
125pref_use_primary_find_in_search = False
126
127
128def set_use_primary_find_in_search(toWhat):
129    global pref_use_primary_find_in_search
130    pref_use_primary_find_in_search = toWhat
131
132
133y, c, n, u = map(icu_lower, (_('yes'), _('checked'), _('no'), _('unchecked')))
134yes_vals = {y, c, 'true'}
135no_vals = {n, u, 'false'}
136del y, c, n, u
137
138
139def force_to_bool(val):
140    if isinstance(val, (bytes, str)):
141        if isinstance(val, bytes):
142            val = force_unicode(val)
143        try:
144            val = icu_lower(val)
145            if not val:
146                val = None
147            elif val in yes_vals:
148                val = True
149            elif val in no_vals:
150                val = False
151            else:
152                val = bool(int(val))
153        except:
154            val = None
155    return val
156
157
158class CacheRow(list):  # {{{
159
160    def __init__(self, db, composites, datetimes, val, series_col, series_sort_col):
161        from calibre.db.tables import c_parse
162        self.db = db
163        self._composites = composites
164        for num in datetimes:
165            val[num] = c_parse(val[num])
166            if val[num] is UNDEFINED_DATE:
167                val[num] = None
168        list.__init__(self, val)
169        self._must_do = len(composites) > 0
170        self._series_col = series_col
171        self._series_sort_col = series_sort_col
172        self._series_sort = None
173
174    def __getitem__(self, col):
175        if self._must_do:
176            is_comp = False
177            if isinstance(col, slice):
178                start = 0 if col.start is None else col.start
179                step = 1 if col.stop is None else col.stop
180                for c in range(start, col.stop, step):
181                    if c in self._composites:
182                        is_comp = True
183                        break
184            elif col in self._composites:
185                is_comp = True
186            if is_comp:
187                id_ = list.__getitem__(self, 0)
188                self._must_do = False
189                mi = self.db.get_metadata(id_, index_is_id=True,
190                                          get_user_categories=False)
191                for c in self._composites:
192                    self[c] =  mi.get(self._composites[c])
193        if col == self._series_sort_col and self._series_sort is None:
194            if self[self._series_col]:
195                self._series_sort = title_sort(self[self._series_col])
196                self[self._series_sort_col] = self._series_sort
197            else:
198                self._series_sort = ''
199                self[self._series_sort_col] = ''
200        return list.__getitem__(self, col)
201
202    def __getslice__(self, i, j):
203        return self.__getitem__(slice(i, j))
204
205    def refresh_composites(self):
206        for c in self._composites:
207            self[c] =  None
208        self._must_do = True
209
210# }}}
211
212
213class ResultCache(SearchQueryParser):  # {{{
214
215    '''
216    Stores sorted and filtered metadata in memory.
217    '''
218
219    def __init__(self, FIELD_MAP, field_metadata, db_prefs=None):
220        self.FIELD_MAP = FIELD_MAP
221        self.db_prefs = db_prefs
222        self.composites = {}
223        self.datetimes = set()
224        self.udc = get_udc()
225        for key in field_metadata:
226            dt = field_metadata[key]['datatype']
227            if dt == 'composite':
228                self.composites[field_metadata[key]['rec_index']] = key
229            elif dt == 'datetime':
230                self.datetimes.add(field_metadata[key]['rec_index'])
231        self.series_col = field_metadata['series']['rec_index']
232        self.series_sort_col = field_metadata['series_sort']['rec_index']
233        self._data = []
234        self._map = self._map_filtered = []
235        self.first_sort = True
236        self.search_restriction = self.base_restriction = ''
237        self.base_restriction_name = self.search_restriction_name = ''
238        self.search_restriction_book_count = 0
239        self.marked_ids_dict = {}
240        self.field_metadata = field_metadata
241        self.all_search_locations = field_metadata.get_search_terms()
242        SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
243        self.build_date_relop_dict()
244        self.build_numeric_relop_dict()
245        # Do this here so the var get updated when a library changes
246        global pref_use_primary_find_in_search
247        pref_use_primary_find_in_search = prefs['use_primary_find_in_search']
248        self._uuid_column_index = self.FIELD_MAP['uuid']
249        self._uuid_map = {}
250
251    def break_cycles(self):
252        self._data = self.field_metadata = self.FIELD_MAP = \
253            self.numeric_search_relops = self.date_search_relops = \
254            self.db_prefs = self.all_search_locations = None
255        self.sqp_change_locations([])
256
257    def __getitem__(self, row):
258        return self._data[self._map_filtered[row]]
259
260    def __len__(self):
261        return len(self._map_filtered)
262
263    def __iter__(self):
264        for id in self._map_filtered:
265            yield self._data[id]
266
267    def iterall(self):
268        for x in self._data:
269            if x is not None:
270                yield x
271
272    def iterallids(self):
273        idx = self.FIELD_MAP['id']
274        for x in self.iterall():
275            yield x[idx]
276
277    # Search functions {{{
278
279    def universal_set(self):
280        return {i[0] for i in self._data if i is not None}
281
282    def change_search_locations(self, locations):
283        self.sqp_change_locations(locations)
284        self.all_search_locations = locations
285
286    def build_date_relop_dict(self):
287        '''
288        Because the database dates have time in them, we can't use direct
289        comparisons even when field_count == 3. The query has time = 0, but
290        the database object has time == something. As such, a complete compare
291        will almost never be correct.
292        '''
293        def relop_eq(db, query, field_count):
294            if db.year == query.year:
295                if field_count == 1:
296                    return True
297                if db.month == query.month:
298                    if field_count == 2:
299                        return True
300                    return db.day == query.day
301            return False
302
303        def relop_gt(db, query, field_count):
304            if db.year > query.year:
305                return True
306            if field_count > 1 and db.year == query.year:
307                if db.month > query.month:
308                    return True
309                return field_count == 3 and db.month == query.month and db.day > query.day
310            return False
311
312        def relop_lt(db, query, field_count):
313            if db.year < query.year:
314                return True
315            if field_count > 1 and db.year == query.year:
316                if db.month < query.month:
317                    return True
318                return field_count == 3 and db.month == query.month and db.day < query.day
319            return False
320
321        def relop_ne(db, query, field_count):
322            return not relop_eq(db, query, field_count)
323
324        def relop_ge(db, query, field_count):
325            return not relop_lt(db, query, field_count)
326
327        def relop_le(db, query, field_count):
328            return not relop_gt(db, query, field_count)
329
330        self.date_search_relops = {
331                            '=' :[1, relop_eq],
332                            '>' :[1, relop_gt],
333                            '<' :[1, relop_lt],
334                            '!=':[2, relop_ne],
335                            '>=':[2, relop_ge],
336                            '<=':[2, relop_le]
337                        }
338
339    local_today         = ('_today', icu_lower(_('today')))
340    local_yesterday     = ('_yesterday', icu_lower(_('yesterday')))
341    local_thismonth     = ('_thismonth', icu_lower(_('thismonth')))
342    local_daysago       = icu_lower(_('daysago'))
343    local_daysago_len   = len(local_daysago)
344    untrans_daysago     = '_daysago'
345    untrans_daysago_len = len('_daysago')
346
347    def get_dates_matches(self, location, query, candidates):
348        matches = set()
349        if len(query) < 2:
350            return matches
351
352        if location == 'date':
353            location = 'timestamp'
354        loc = self.field_metadata[location]['rec_index']
355
356        if query == 'false':
357            for id_ in candidates:
358                item = self._data[id_]
359                if item is None:
360                    continue
361                v = item[loc]
362                if isinstance(v, (bytes, str)):
363                    v = parse_date(v)
364                if v is None or v <= UNDEFINED_DATE:
365                    matches.add(item[0])
366            return matches
367        if query == 'true':
368            for id_ in candidates:
369                item = self._data[id_]
370                if item is None:
371                    continue
372                v = item[loc]
373                if isinstance(v, (bytes, str)):
374                    v = parse_date(v)
375                if v is not None and v > UNDEFINED_DATE:
376                    matches.add(item[0])
377            return matches
378
379        relop = None
380        for k in self.date_search_relops.keys():
381            if query.startswith(k):
382                (p, relop) = self.date_search_relops[k]
383                query = query[p:]
384        if relop is None:
385            (p, relop) = self.date_search_relops['=']
386
387        if query in self.local_today:
388            qd = now()
389            field_count = 3
390        elif query in self.local_yesterday:
391            qd = now() - timedelta(1)
392            field_count = 3
393        elif query in self.local_thismonth:
394            qd = now()
395            field_count = 2
396        elif query.endswith(self.local_daysago) or query.endswith(self.untrans_daysago):
397            num = query[0:-(self.untrans_daysago_len if query.endswith(self.untrans_daysago) else self.local_daysago_len)]
398            try:
399                qd = now() - timedelta(int(num))
400            except:
401                raise ParseException(_('Number conversion error: {0}').format(num))
402            field_count = 3
403        else:
404            try:
405                qd = parse_date(query, as_utc=False)
406            except:
407                raise ParseException(_('Date conversion error: {0}').format(query))
408            if '-' in query:
409                field_count = query.count('-') + 1
410            else:
411                field_count = query.count('/') + 1
412        for id_ in candidates:
413            item = self._data[id_]
414            if item is None or item[loc] is None:
415                continue
416            v = item[loc]
417            if isinstance(v, (bytes, str)):
418                v = parse_date(v)
419            if relop(v, qd, field_count):
420                matches.add(item[0])
421        return matches
422
423    def build_numeric_relop_dict(self):
424        self.numeric_search_relops = {
425                        '=':[1, lambda r, q: r == q],
426                        '>':[1, lambda r, q: r is not None and r > q],
427                        '<':[1, lambda r, q: r is not None and r < q],
428                        '!=':[2, lambda r, q: r != q],
429                        '>=':[2, lambda r, q: r is not None and r >= q],
430                        '<=':[2, lambda r, q: r is not None and r <= q]
431                    }
432
433    def get_numeric_matches(self, location, query, candidates, val_func=None):
434        matches = set()
435        if len(query) == 0:
436            return matches
437
438        if val_func is None:
439            loc = self.field_metadata[location]['rec_index']
440            val_func = lambda item, loc=loc: item[loc]
441        q = ''
442        cast = adjust = lambda x: x
443        dt = self.field_metadata[location]['datatype']
444
445        if query == 'false':
446            if dt == 'rating' or location == 'cover':
447                relop = lambda x,y: not bool(x)
448            else:
449                relop = lambda x,y: x is None
450        elif query == 'true':
451            if dt == 'rating' or location == 'cover':
452                relop = lambda x,y: bool(x)
453            else:
454                relop = lambda x,y: x is not None
455        else:
456            relop = None
457            for k in self.numeric_search_relops.keys():
458                if query.startswith(k):
459                    (p, relop) = self.numeric_search_relops[k]
460                    query = query[p:]
461            if relop is None:
462                (p, relop) = self.numeric_search_relops['=']
463
464            if dt == 'int':
465                cast = lambda x: int(x)
466            elif dt == 'rating':
467                cast = lambda x: 0 if x is None else int(x)
468                adjust = lambda x: x//2
469            elif dt in ('float', 'composite'):
470                cast = lambda x : float(x)
471            else:  # count operation
472                cast = (lambda x: int(x))
473
474            if len(query) > 1:
475                mult = query[-1:].lower()
476                mult = {'k':1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0)
477                if mult != 1.0:
478                    query = query[:-1]
479            else:
480                mult = 1.0
481            try:
482                q = cast(query) * mult
483            except:
484                raise ParseException(_('Non-numeric value in query: {0}').format(query))
485
486        for id_ in candidates:
487            item = self._data[id_]
488            if item is None:
489                continue
490            try:
491                v = cast(val_func(item))
492            except:
493                v = None
494            if v:
495                v = adjust(v)
496            if relop(v, q):
497                matches.add(item[0])
498        return matches
499
500    def get_user_category_matches(self, location, query, candidates):
501        matches = set()
502        if self.db_prefs is None or len(query) < 2:
503            return matches
504        user_cats = self.db_prefs.get('user_categories', [])
505        c = set(candidates)
506
507        if query.startswith('.'):
508            check_subcats = True
509            query = query[1:]
510        else:
511            check_subcats = False
512
513        for key in user_cats:
514            if key == location or (check_subcats and key.startswith(location + '.')):
515                for (item, category, ign) in user_cats[key]:
516                    s = self.get_matches(category, '=' + item, candidates=c)
517                    c -= s
518                    matches |= s
519        if query == 'false':
520            return candidates - matches
521        return matches
522
523    def get_keypair_matches(self, location, query, candidates):
524        matches = set()
525        if query.find(':') >= 0:
526            q = [q.strip() for q in query.split(':')]
527            if len(q) != 2:
528                raise ParseException(
529                 _('Invalid query format for colon-separated search: {0}').format(query))
530            (keyq, valq) = q
531            keyq_mkind, keyq = self._matchkind(keyq)
532            valq_mkind, valq = self._matchkind(valq)
533        else:
534            keyq = keyq_mkind = ''
535            valq_mkind, valq = self._matchkind(query)
536
537        loc = self.field_metadata[location]['rec_index']
538        split_char = self.field_metadata[location]['is_multiple'].get(
539                'cache_to_list', ',')
540        for id_ in candidates:
541            item = self._data[id_]
542            if item is None:
543                continue
544
545            if item[loc] is None:
546                if valq == 'false':
547                    matches.add(id_)
548                continue
549
550            add_if_nothing_matches = valq == 'false'
551            pairs = [p.strip() for p in item[loc].split(split_char)]
552            for pair in pairs:
553                parts = pair.split(':')
554                if len(parts) != 2:
555                    continue
556                k = parts[:1]
557                v = parts[1:]
558                if keyq and not _match(keyq, k, keyq_mkind,
559                                       use_primary_find_in_search=pref_use_primary_find_in_search):
560                    continue
561                if valq:
562                    if valq == 'true':
563                        if not v:
564                            continue
565                    elif valq == 'false':
566                        if v:
567                            add_if_nothing_matches = False
568                            continue
569                    elif not _match(valq, v, valq_mkind,
570                                    use_primary_find_in_search=pref_use_primary_find_in_search):
571                        continue
572                matches.add(id_)
573
574            if add_if_nothing_matches:
575                matches.add(id_)
576        return matches
577
578    def _matchkind(self, query):
579        matchkind = CONTAINS_MATCH
580        if (len(query) > 1):
581            if query.startswith('\\'):
582                query = query[1:]
583            elif query.startswith('='):
584                matchkind = EQUALS_MATCH
585                query = query[1:]
586            elif query.startswith('~'):
587                matchkind = REGEXP_MATCH
588                query = query[1:]
589
590        if matchkind != REGEXP_MATCH:
591            # leave case in regexps because it can be significant e.g. \S \W \D
592            query = icu_lower(query)
593        return matchkind, query
594
595    local_no        = icu_lower(_('no'))
596    local_yes       = icu_lower(_('yes'))
597    local_unchecked = icu_lower(_('unchecked'))
598    local_checked   = icu_lower(_('checked'))
599    local_empty     = icu_lower(_('empty'))
600    local_blank     = icu_lower(_('blank'))
601    local_bool_values = (
602                    local_no, local_unchecked, '_no', 'false',
603                    local_yes, local_checked, '_yes', 'true',
604                    local_empty, local_blank, '_empty')
605
606    def get_bool_matches(self, location, query, candidates):
607        bools_are_tristate = self.db_prefs.get('bools_are_tristate')
608        loc = self.field_metadata[location]['rec_index']
609        matches = set()
610        query = icu_lower(query)
611        if query not in self.local_bool_values:
612            raise ParseException(_('Invalid boolean query "{0}"').format(query))
613        for id_ in candidates:
614            item = self._data[id_]
615            if item is None:
616                continue
617
618            val = force_to_bool(item[loc])
619            if not bools_are_tristate:
620                if val is None or not val:  # item is None or set to false
621                    if query in (self.local_no, self.local_unchecked, '_no', 'false'):
622                        matches.add(item[0])
623                else:  # item is explicitly set to true
624                    if query in (self.local_yes, self.local_checked, '_yes', 'true'):
625                        matches.add(item[0])
626            else:
627                if val is None:
628                    if query in (self.local_empty, self.local_blank, '_empty', 'false'):
629                        matches.add(item[0])
630                elif not val:  # is not None and false
631                    if query in (self.local_no, self.local_unchecked, '_no', 'true'):
632                        matches.add(item[0])
633                else:  # item is not None and true
634                    if query in (self.local_yes, self.local_checked, '_yes', 'true'):
635                        matches.add(item[0])
636        return matches
637
638    def get_matches(self, location, query, candidates=None,
639            allow_recursion=True):
640        # If candidates is not None, it must not be modified. Changing its
641        # value will break query optimization in the search parser
642        matches = set()
643        if candidates is None:
644            candidates = self.universal_set()
645        if len(candidates) == 0:
646            return matches
647        if location not in self.all_search_locations:
648            return matches
649
650        if len(location) > 2 and location.startswith('@') and \
651                    location[1:] in self.db_prefs['grouped_search_terms']:
652            location = location[1:]
653
654        if query and query.strip():
655            # get metadata key associated with the search term. Eliminates
656            # dealing with plurals and other aliases
657            original_location = location
658            location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip()))
659            # grouped search terms
660            if isinstance(location, list):
661                if allow_recursion:
662                    if query.lower() == 'false':
663                        invert = True
664                        query = 'true'
665                    else:
666                        invert = False
667                    for loc in location:
668                        c = candidates.copy()
669                        m = self.get_matches(loc, query,
670                                candidates=c, allow_recursion=False)
671                        matches |= m
672                        c -= m
673                        if len(c) == 0:
674                            break
675                    if invert:
676                        matches = self.universal_set() - matches
677                    return matches
678                raise ParseException(_('Recursive query group detected: {0}').format(query))
679
680            # apply the limit if appropriate
681            if location == 'all' and prefs['limit_search_columns'] and \
682                            prefs['limit_search_columns_to']:
683                terms = set()
684                for l in prefs['limit_search_columns_to']:
685                    l = icu_lower(l.strip())
686                    if l and l != 'all' and l in self.all_search_locations:
687                        terms.add(l)
688                if terms:
689                    c = candidates.copy()
690                    for l in terms:
691                        try:
692                            m = self.get_matches(l, query,
693                                candidates=c, allow_recursion=allow_recursion)
694                            matches |= m
695                            c -= m
696                            if len(c) == 0:
697                                break
698                        except:
699                            pass
700                    return matches
701
702            if location in self.field_metadata:
703                fm = self.field_metadata[location]
704                # take care of dates special case
705                if fm['datatype'] == 'datetime' or \
706                        (fm['datatype'] == 'composite' and
707                         fm['display'].get('composite_sort', '') == 'date'):
708                    return self.get_dates_matches(location, query.lower(), candidates)
709
710                # take care of numbers special case
711                if fm['datatype'] in ('rating', 'int', 'float') or \
712                        (fm['datatype'] == 'composite' and
713                         fm['display'].get('composite_sort', '') == 'number'):
714                    return self.get_numeric_matches(location, query.lower(), candidates)
715
716                if fm['datatype'] == 'bool':
717                    return self.get_bool_matches(location, query, candidates)
718
719                # take care of the 'count' operator for is_multiples
720                if fm['is_multiple'] and \
721                        len(query) > 1 and query.startswith('#') and \
722                        query[1:1] in '=<>!':
723                    vf = lambda item, loc=fm['rec_index'], \
724                                ms=fm['is_multiple']['cache_to_list']:\
725                                len(item[loc].split(ms)) if item[loc] is not None else 0
726                    return self.get_numeric_matches(location, query[1:],
727                                                    candidates, val_func=vf)
728
729                # special case: colon-separated fields such as identifiers. isbn
730                # is a special case within the case
731                if fm.get('is_csp', False):
732                    if location == 'identifiers' and original_location == 'isbn':
733                        return self.get_keypair_matches('identifiers',
734                                                   '=isbn:'+query, candidates)
735                    return self.get_keypair_matches(location, query, candidates)
736
737            # check for user categories
738            if len(location) >= 2 and location.startswith('@'):
739                return self.get_user_category_matches(location[1:], query.lower(),
740                                                      candidates)
741            # everything else, or 'all' matches
742            matchkind, query = self._matchkind(query)
743
744            if not isinstance(query, str):
745                query = query.decode('utf-8')
746
747            db_col = {}
748            exclude_fields = []  # fields to not check when matching against text.
749            col_datatype = []
750            is_multiple_cols = {}
751            for x in range(len(self.FIELD_MAP)):
752                col_datatype.append('')
753            for x in self.field_metadata:
754                if x.startswith('@'):
755                    continue
756                if len(self.field_metadata[x]['search_terms']):
757                    db_col[x] = self.field_metadata[x]['rec_index']
758                    if self.field_metadata[x]['datatype'] not in \
759                            ['composite', 'text', 'comments', 'series', 'enumeration']:
760                        exclude_fields.append(db_col[x])
761                    col_datatype[db_col[x]] = self.field_metadata[x]['datatype']
762                    is_multiple_cols[db_col[x]] = \
763                        self.field_metadata[x]['is_multiple'].get('cache_to_list', None)
764
765            try:
766                rating_query = int(query) * 2
767            except:
768                rating_query = None
769
770            location = [location] if location != 'all' else list(db_col.keys())
771            for i, loc in enumerate(location):
772                location[i] = db_col[loc]
773
774            current_candidates = candidates.copy()
775            for loc in location:  # location is now an array of field indices
776                if loc == db_col['authors']:
777                    # DB stores authors with commas changed to bars, so change query
778                    if matchkind == REGEXP_MATCH:
779                        q = query.replace(',', r'\|')
780                    else:
781                        q = query.replace(',', '|')
782                elif loc == db_col['languages']:
783                    q = canonicalize_lang(query)
784                    if q is None:
785                        lm = lang_map()
786                        rm = {v.lower():k for k,v in iteritems(lm)}
787                        q = rm.get(query, query)
788                else:
789                    q = query
790
791                for id_ in current_candidates:
792                    item = self._data[id_]
793                    if item is None:
794                        continue
795
796                    if not item[loc]:
797                        if q == 'false' and matchkind == CONTAINS_MATCH:
798                            matches.add(item[0])
799                        continue     # item is empty. No possible matches below
800                    if q == 'false'and matchkind == CONTAINS_MATCH:
801                        # Field has something in it, so a false query does not match
802                        continue
803
804                    if q == 'true' and matchkind == CONTAINS_MATCH:
805                        if isinstance(item[loc], string_or_bytes):
806                            if item[loc].strip() == '':
807                                continue
808                        matches.add(item[0])
809                        continue
810
811                    if col_datatype[loc] == 'rating':  # get here if 'all' query
812                        if rating_query and rating_query == int(item[loc]):
813                            matches.add(item[0])
814                        continue
815
816                    try:  # a conversion below might fail
817                        # relationals are not supported in 'all' queries
818                        if col_datatype[loc] == 'float':
819                            if float(query) == item[loc]:
820                                matches.add(item[0])
821                            continue
822                        if col_datatype[loc] == 'int':
823                            if int(query) == item[loc]:
824                                matches.add(item[0])
825                            continue
826                    except:
827                        # A conversion threw an exception. Because of the type,
828                        # no further match is possible
829                        continue
830
831                    if loc not in exclude_fields:  # time for text matching
832                        if is_multiple_cols[loc] is not None:
833                            vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
834                        else:
835                            vals = [item[loc]]  # make into list to make _match happy
836                        if _match(q, vals, matchkind,
837                                  use_primary_find_in_search=pref_use_primary_find_in_search):
838                            matches.add(item[0])
839                            continue
840                current_candidates -= matches
841        return matches
842
843    def search(self, query, return_matches=False, sort_results=True):
844        ans = self.search_getting_ids(query, self.search_restriction,
845                                      set_restriction_count=True, sort_results=sort_results)
846        if return_matches:
847            return ans
848        self._map_filtered = ans
849
850    def _build_restriction_string(self, restriction):
851        if self.base_restriction:
852            if restriction:
853                return '(%s) and (%s)' % (self.base_restriction, restriction)
854            else:
855                return self.base_restriction
856        else:
857            return restriction
858
859    def search_getting_ids(self, query, search_restriction,
860                           set_restriction_count=False, use_virtual_library=True, sort_results=True):
861        if use_virtual_library:
862            search_restriction = self._build_restriction_string(search_restriction)
863        q = ''
864        if not query or not query.strip():
865            q = search_restriction
866        else:
867            q = query
868            if search_restriction:
869                q = '(%s) and (%s)' % (search_restriction, query)
870        if not q:
871            if set_restriction_count:
872                self.search_restriction_book_count = len(self._map)
873            return list(self._map)
874        matches = self.parse(q)
875        tmap = list(repeat(False, len(self._data)))
876        for x in matches:
877            tmap[x] = True
878        rv = [x for x in self._map if tmap[x]]
879        if set_restriction_count and q == search_restriction:
880            self.search_restriction_book_count = len(rv)
881        return rv
882
883    def get_search_restriction(self):
884        return self.search_restriction
885
886    def set_search_restriction(self, s):
887        self.search_restriction = s
888
889    def get_base_restriction(self):
890        return self.base_restriction
891
892    def set_base_restriction(self, s):
893        self.base_restriction = s
894
895    def get_base_restriction_name(self):
896        return self.base_restriction_name
897
898    def set_base_restriction_name(self, s):
899        self.base_restriction_name = s
900
901    def get_search_restriction_name(self):
902        return self.search_restriction_name
903
904    def set_search_restriction_name(self, s):
905        self.search_restriction_name = s
906
907    def search_restriction_applied(self):
908        return bool(self.search_restriction) or bool(self.base_restriction)
909
910    def get_search_restriction_book_count(self):
911        return self.search_restriction_book_count
912
913    def set_marked_ids(self, id_dict):
914        '''
915        ids in id_dict are "marked". They can be searched for by
916        using the search term ``marked:true``. Pass in an empty dictionary or
917        set to clear marked ids.
918
919        :param id_dict: Either a dictionary mapping ids to values or a set
920        of ids. In the latter case, the value is set to 'true' for all ids. If
921        a mapping is provided, then the search can be used to search for
922        particular values: ``marked:value``
923        '''
924        if not hasattr(id_dict, 'items'):
925            # Simple list. Make it a dict of string 'true'
926            self.marked_ids_dict = dict.fromkeys(id_dict, 'true')
927        else:
928            # Ensure that all the items in the dict are text
929            self.marked_ids_dict = dict(zip(iter(id_dict), map(str,
930                itervalues(id_dict))))
931
932        # Set the values in the cache
933        marked_col = self.FIELD_MAP['marked']
934        for r in self.iterall():
935            r[marked_col] = None
936
937        for id_, val in iteritems(self.marked_ids_dict):
938            try:
939                self._data[id_][marked_col] = val
940            except:
941                pass
942
943    def get_marked(self, idx, index_is_id=True, default_value=None):
944        id_ = idx if index_is_id else self[idx][0]
945        return self.marked_ids_dict.get(id_, default_value)
946
947    # }}}
948
949    def remove(self, id):
950        try:
951            self._uuid_map.pop(self._data[id][self._uuid_column_index], None)
952        except (IndexError, TypeError):
953            pass  # id is out of bounds -- no uuid in the map to remove
954        try:
955            self._data[id] = None
956        except IndexError:
957            pass  # id is out of bounds, no point setting it to None anyway
958        try:
959            self._map.remove(id)
960        except ValueError:
961            pass
962        try:
963            self._map_filtered.remove(id)
964        except ValueError:
965            pass
966
967    def set(self, row, col, val, row_is_id=False):
968        id = row if row_is_id else self._map_filtered[row]
969        d = self._data[id]
970        if col == self._uuid_column_index:
971            self._uuid_map.pop(d[col], None)
972        d[col] = val
973        if col == self._uuid_column_index:
974            self._uuid_map[val] = id
975        d.refresh_composites()
976
977    def get(self, row, col, row_is_id=False):
978        id = row if row_is_id else self._map_filtered[row]
979        return self._data[id][col]
980
981    def index(self, id, cache=False):
982        x = self._map if cache else self._map_filtered
983        return x.index(id)
984
985    def row(self, id):
986        return self.index(id)
987
988    def has_id(self, id):
989        try:
990            return self._data[id] is not None
991        except IndexError:
992            pass
993        return False
994
995    def refresh_ids(self, db, ids):
996        '''
997        Refresh the data in the cache for books identified by ids.
998        Returns a list of affected rows or None if the rows are filtered.
999        '''
1000        for id in ids:
1001            try:
1002                self._data[id] = CacheRow(db, self.composites, self.datetimes,
1003                        db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0],
1004                        self.series_col, self.series_sort_col)
1005                self._data[id].append(db.book_on_device_string(id))
1006                self._data[id].append(self.marked_ids_dict.get(id, None))
1007                self._data[id].append(None)
1008                self._uuid_map[self._data[id][self._uuid_column_index]] = id
1009            except IndexError:
1010                return None
1011        try:
1012            return list(map(self.row, ids))
1013        except ValueError:
1014            pass
1015        return None
1016
1017    def books_added(self, ids, db):
1018        if not ids:
1019            return
1020        self._data.extend(repeat(None, max(ids)-len(self._data)+2))
1021        for id in ids:
1022            self._data[id] = CacheRow(db, self.composites, self.datetimes,
1023                        db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0],
1024                        self.series_col, self.series_sort_col)
1025            self._data[id].append(db.book_on_device_string(id))
1026            self._data[id].append(self.marked_ids_dict.get(id, None))
1027            self._data[id].append(None)  # Series sort column
1028            self._uuid_map[self._data[id][self._uuid_column_index]] = id
1029        self._map[0:0] = ids
1030        self._map_filtered[0:0] = ids
1031
1032    def books_deleted(self, ids):
1033        for id in ids:
1034            self.remove(id)
1035
1036    def count(self):
1037        return len(self._map)
1038
1039    def refresh_ondevice(self, db):
1040        ondevice_col = self.FIELD_MAP['ondevice']
1041        for item in self._data:
1042            if item is not None:
1043                item[ondevice_col] = db.book_on_device_string(item[0])
1044                item.refresh_composites()
1045
1046    def refresh(self, db, field=None, ascending=True):
1047        # reinitialize the template cache in case a composite column has changed
1048        db.initialize_template_cache()
1049
1050        temp = db.conn.get('SELECT * FROM meta2')
1051        self._data = list(repeat(None, temp[-1][0]+2)) if temp else []
1052        for r in temp:
1053            self._data[r[0]] = CacheRow(db, self.composites, self.datetimes, r,
1054                                        self.series_col, self.series_sort_col)
1055            self._uuid_map[self._data[r[0]][self._uuid_column_index]] = r[0]
1056
1057        for item in self._data:
1058            if item is not None:
1059                item.append(db.book_on_device_string(item[0]))
1060                # Temp mark and series_sort columns
1061                item.extend((None, None))
1062
1063        marked_col = self.FIELD_MAP['marked']
1064        for id_,val in iteritems(self.marked_ids_dict):
1065            try:
1066                self._data[id_][marked_col] = val
1067            except:
1068                pass
1069
1070        self._map = [i[0] for i in self._data if i is not None]
1071        if field is not None:
1072            self.sort(field, ascending)
1073        self._map_filtered = list(self._map)
1074        if self.search_restriction or self.base_restriction:
1075            self.search('', return_matches=False)
1076
1077    # Sorting functions {{{
1078
1079    def sanitize_sort_field_name(self, field):
1080        field = self.field_metadata.search_term_to_field_key(field.lower().strip())
1081        # translate some fields to their hidden equivalent
1082        if field == 'title':
1083            field = 'sort'
1084        elif field == 'authors':
1085            field = 'author_sort'
1086        return field
1087
1088    def sort(self, field, ascending, subsort=False):
1089        self.multisort([(field, ascending)])
1090
1091    def multisort(self, fields=[], subsort=False, only_ids=None):
1092        '''
1093        fields is a list of 2-tuple, each tuple is of the form
1094        (field_name, is_ascending)
1095
1096        If only_ids is a list of ids, this function will sort that list instead
1097        of the internal mapping of ids.
1098        '''
1099        fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields]
1100        keys = self.field_metadata.sortable_field_keys()
1101        fields = [x for x in fields if x[0] in keys]
1102        if subsort and 'sort' not in [x[0] for x in fields]:
1103            fields += [('sort', True)]
1104        if not fields:
1105            fields = [('timestamp', False)]
1106
1107        keyg = SortKeyGenerator(fields, self.field_metadata, self._data, self.db_prefs)
1108        if only_ids is None:
1109            self._map.sort(key=keyg)
1110
1111            tmap = list(repeat(False, len(self._data)))
1112            for x in self._map_filtered:
1113                tmap[x] = True
1114            self._map_filtered = [x for x in self._map if tmap[x]]
1115        else:
1116            only_ids.sort(key=keyg)
1117
1118
1119class SortKey:
1120
1121    def __init__(self, orders, values):
1122        self.orders, self.values = orders, values
1123
1124    def compare_to_other(self, other):
1125        for i, ascending in enumerate(self.orders):
1126            ans = cmp(self.values[i], other.values[i])
1127            if ans != 0:
1128                return ans * ascending
1129        return 0
1130
1131    def __eq__(self, other):
1132        return self.compare_to_other(other) == 0
1133
1134    def __ne__(self, other):
1135        return self.compare_to_other(other) != 0
1136
1137    def __lt__(self, other):
1138        return self.compare_to_other(other) < 0
1139
1140    def __le__(self, other):
1141        return self.compare_to_other(other) <= 0
1142
1143    def __gt__(self, other):
1144        return self.compare_to_other(other) > 0
1145
1146    def __ge__(self, other):
1147        return self.compare_to_other(other) >= 0
1148
1149
1150class SortKeyGenerator:
1151
1152    def __init__(self, fields, field_metadata, data, db_prefs):
1153        from calibre.utils.icu import sort_key
1154        self.field_metadata = field_metadata
1155        self.db_prefs = db_prefs
1156        self.orders = [1 if x[1] else -1 for x in fields]
1157        self.entries = [(x[0], field_metadata[x[0]]) for x in fields]
1158        self.library_order = tweaks['title_series_sorting'] == 'library_order'
1159        self.data = data
1160        self.string_sort_key = sort_key
1161        self.lang_idx = field_metadata['languages']['rec_index']
1162
1163    def __call__(self, record):
1164        values = tuple(self.itervals(self.data[record]))
1165        return SortKey(self.orders, values)
1166
1167    def itervals(self, record):
1168        for name, fm in self.entries:
1169            dt = fm['datatype']
1170            val = record[fm['rec_index']]
1171            if dt == 'composite':
1172                sb = fm['display'].get('composite_sort', 'text')
1173                if sb == 'date':
1174                    try:
1175                        val = parse_date(val)
1176                    except:
1177                        val = UNDEFINED_DATE
1178                    dt = 'datetime'
1179                elif sb == 'number':
1180                    try:
1181                        p = 1
1182                        for i, candidate in enumerate(
1183                                    ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')):
1184                            if val.endswith(candidate):
1185                                p = 1024**(i)
1186                                val = val[:-len(candidate)].strip()
1187                                break
1188                        val = locale.atof(val) * p
1189                    except:
1190                        val = 0.0
1191                    dt = 'float'
1192                elif sb == 'bool':
1193                    val = force_to_bool(val)
1194                    dt = 'bool'
1195
1196            if dt == 'datetime':
1197                if val is None:
1198                    val = UNDEFINED_DATE
1199                if tweaks['sort_dates_using_visible_fields']:
1200                    format = None
1201                    if name == 'timestamp':
1202                        format = tweaks['gui_timestamp_display_format']
1203                    elif name == 'pubdate':
1204                        format = tweaks['gui_pubdate_display_format']
1205                    elif name == 'last_modified':
1206                        format = tweaks['gui_last_modified_display_format']
1207                    elif fm['is_custom']:
1208                        format = fm['display'].get('date_format', None)
1209                    val = clean_date_for_sort(val, format)
1210            elif dt == 'series':
1211                if val is None:
1212                    val = ('', 1)
1213                else:
1214                    if self.library_order:
1215                        try:
1216                            lang = record[self.lang_idx].partition(',')[0]
1217                        except (AttributeError, ValueError, KeyError,
1218                                IndexError, TypeError):
1219                            lang = None
1220                        val = title_sort(val, order='library_order', lang=lang)
1221                    sidx_fm = self.field_metadata[name + '_index']
1222                    sidx = record[sidx_fm['rec_index']]
1223                    val = (self.string_sort_key(val), sidx)
1224
1225            elif dt in ('text', 'comments', 'composite', 'enumeration'):
1226                if val:
1227                    if fm['is_multiple']:
1228                        jv = fm['is_multiple']['list_to_ui']
1229                        sv = fm['is_multiple']['cache_to_list']
1230                        if '&' in jv:
1231                            val = jv.join(
1232                                [author_to_author_sort(v) for v in val.split(sv)])
1233                        else:
1234                            val = jv.join(sorted(val.split(sv),
1235                                              key=self.string_sort_key))
1236                val = self.string_sort_key(val)
1237
1238            elif dt == 'bool':
1239                if not self.db_prefs.get('bools_are_tristate'):
1240                    val = {True: 1, False: 2, None: 2}.get(val, 2)
1241                else:
1242                    val = {True: 1, False: 2, None: 3}.get(val, 3)
1243
1244            yield val
1245
1246    # }}}
1247
1248# }}}
1249