1#!/usr/local/bin/python3.8 2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' 7__docformat__ = 'restructuredtext en' 8 9import regex, weakref, operator 10from functools import partial 11from datetime import timedelta 12from collections import deque, OrderedDict 13 14from calibre.constants import preferred_encoding, DEBUG 15from calibre.db.utils import force_to_bool 16from calibre.utils.config_base import prefs 17from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local 18from calibre.utils.icu import primary_contains, sort_key 19from calibre.utils.localization import lang_map, canonicalize_lang 20from calibre.utils.search_query_parser import SearchQueryParser, ParseException 21from polyglot.builtins import iteritems, string_or_bytes 22 23CONTAINS_MATCH = 0 24EQUALS_MATCH = 1 25REGEXP_MATCH = 2 26 27# Utils {{{ 28 29 30def _matchkind(query, case_sensitive=False): 31 matchkind = CONTAINS_MATCH 32 if (len(query) > 1): 33 if query.startswith('\\'): 34 query = query[1:] 35 elif query.startswith('='): 36 matchkind = EQUALS_MATCH 37 query = query[1:] 38 elif query.startswith('~'): 39 matchkind = REGEXP_MATCH 40 query = query[1:] 41 42 if not case_sensitive and matchkind != REGEXP_MATCH: 43 # leave case in regexps because it can be significant e.g. \S \W \D 44 query = icu_lower(query) 45 return matchkind, query 46 47 48def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensitive=False): 49 if query.startswith('..'): 50 query = query[1:] 51 sq = query[1:] 52 internal_match_ok = True 53 else: 54 internal_match_ok = False 55 for t in value: 56 try: # ignore regexp exceptions, required because search-ahead tries before typing is finished 57 if not case_sensitive: 58 t = icu_lower(t) 59 if (matchkind == EQUALS_MATCH): 60 if internal_match_ok: 61 if query == t: 62 return True 63 comps = [c.strip() for c in t.split('.') if c.strip()] 64 for comp in comps: 65 if sq == comp: 66 return True 67 elif query[0] == '.': 68 if t.startswith(query[1:]): 69 ql = len(query) - 1 70 if (len(t) == ql) or (t[ql:ql+1] == '.'): 71 return True 72 elif query == t: 73 return True 74 elif matchkind == REGEXP_MATCH: 75 flags = regex.UNICODE | regex.VERSION1 | regex.FULLCASE | (0 if case_sensitive else regex.IGNORECASE) 76 if regex.search(query, t, flags) is not None: 77 return True 78 elif matchkind == CONTAINS_MATCH: 79 if not case_sensitive and use_primary_find_in_search: 80 if primary_contains(query, t): 81 return True 82 elif query in t: 83 return True 84 except regex.error: 85 pass 86 return False 87# }}} 88 89 90class DateSearch: # {{{ 91 92 def __init__(self): 93 self.operators = OrderedDict(( 94 ('!=', self.ne), 95 ('>=', self.ge), 96 ('<=', self.le), 97 ('=', self.eq), 98 ('>', self.gt), 99 ('<', self.lt), 100 )) 101 self.local_today = {'_today', 'today', icu_lower(_('today'))} 102 self.local_yesterday = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))} 103 self.local_thismonth = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))} 104 self.daysago_pat = regex.compile(r'(%s|daysago|_daysago)$'%_('daysago'), flags=regex.UNICODE | regex.VERSION1) 105 106 def eq(self, dbdate, query, field_count): 107 if dbdate.year == query.year: 108 if field_count == 1: 109 return True 110 if dbdate.month == query.month: 111 if field_count == 2: 112 return True 113 return dbdate.day == query.day 114 return False 115 116 def ne(self, *args): 117 return not self.eq(*args) 118 119 def gt(self, dbdate, query, field_count): 120 if dbdate.year > query.year: 121 return True 122 if field_count > 1 and dbdate.year == query.year: 123 if dbdate.month > query.month: 124 return True 125 return (field_count == 3 and dbdate.month == query.month and 126 dbdate.day > query.day) 127 return False 128 129 def le(self, *args): 130 return not self.gt(*args) 131 132 def lt(self, dbdate, query, field_count): 133 if dbdate.year < query.year: 134 return True 135 if field_count > 1 and dbdate.year == query.year: 136 if dbdate.month < query.month: 137 return True 138 return (field_count == 3 and dbdate.month == query.month and 139 dbdate.day < query.day) 140 return False 141 142 def ge(self, *args): 143 return not self.lt(*args) 144 145 def __call__(self, query, field_iter): 146 matches = set() 147 if len(query) < 2: 148 return matches 149 150 if query == 'false': 151 for v, book_ids in field_iter(): 152 if isinstance(v, (bytes, str)): 153 if isinstance(v, bytes): 154 v = v.decode(preferred_encoding, 'replace') 155 v = parse_date(v) 156 if v is None or v <= UNDEFINED_DATE: 157 matches |= book_ids 158 return matches 159 160 if query == 'true': 161 for v, book_ids in field_iter(): 162 if isinstance(v, (bytes, str)): 163 if isinstance(v, bytes): 164 v = v.decode(preferred_encoding, 'replace') 165 v = parse_date(v) 166 if v is not None and v > UNDEFINED_DATE: 167 matches |= book_ids 168 return matches 169 170 for k, relop in iteritems(self.operators): 171 if query.startswith(k): 172 query = query[len(k):] 173 break 174 else: 175 relop = self.operators['='] 176 177 if query in self.local_today: 178 qd = now() 179 field_count = 3 180 elif query in self.local_yesterday: 181 qd = now() - timedelta(1) 182 field_count = 3 183 elif query in self.local_thismonth: 184 qd = now() 185 field_count = 2 186 else: 187 m = self.daysago_pat.search(query) 188 if m is not None: 189 num = query[:-len(m.group(1))] 190 try: 191 qd = now() - timedelta(int(num)) 192 except: 193 raise ParseException(_('Number conversion error: {0}').format(num)) 194 field_count = 3 195 else: 196 try: 197 qd = parse_date(query, as_utc=False) 198 except: 199 raise ParseException(_('Date conversion error: {0}').format(query)) 200 if '-' in query: 201 field_count = query.count('-') + 1 202 else: 203 field_count = query.count('/') + 1 204 205 for v, book_ids in field_iter(): 206 if isinstance(v, string_or_bytes): 207 v = parse_date(v) 208 if v is not None and relop(dt_as_local(v), qd, field_count): 209 matches |= book_ids 210 211 return matches 212# }}} 213 214 215class NumericSearch: # {{{ 216 217 def __init__(self): 218 self.operators = OrderedDict(( 219 ('!=', operator.ne), 220 ('>=', operator.ge), 221 ('<=', operator.le), 222 ('=', operator.eq), 223 ('>', operator.gt), 224 ('<', operator.lt), 225 )) 226 227 def __call__(self, query, field_iter, location, datatype, candidates, is_many=False): 228 matches = set() 229 if not query: 230 return matches 231 232 q = '' 233 cast = adjust = lambda x: x 234 dt = datatype 235 236 if is_many and query in {'true', 'false'}: 237 valcheck = lambda x: True 238 if datatype == 'rating': 239 valcheck = lambda x: x is not None and x > 0 240 found = set() 241 for val, book_ids in field_iter(): 242 if valcheck(val): 243 found |= book_ids 244 return found if query == 'true' else candidates - found 245 246 if query == 'false': 247 if location == 'cover': 248 relop = lambda x,y: not bool(x) 249 else: 250 relop = lambda x,y: x is None 251 elif query == 'true': 252 if location == 'cover': 253 relop = lambda x,y: bool(x) 254 else: 255 relop = lambda x,y: x is not None 256 else: 257 for k, relop in iteritems(self.operators): 258 if query.startswith(k): 259 query = query[len(k):] 260 break 261 else: 262 relop = self.operators['='] 263 264 if dt == 'rating': 265 cast = lambda x: 0 if x is None else int(x) 266 adjust = lambda x: x // 2 267 else: 268 # Datatype is empty if the source is a template. Assume float 269 cast = float if dt in ('float', 'composite', 'half-rating', '') else int 270 271 mult = 1.0 272 if len(query) > 1: 273 mult = query[-1].lower() 274 mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0) 275 if mult != 1.0: 276 query = query[:-1] 277 else: 278 mult = 1.0 279 280 try: 281 q = cast(query) * mult 282 except Exception: 283 raise ParseException( 284 _('Non-numeric value in query: {0}').format(query)) 285 if dt == 'half-rating': 286 q = int(round(q * 2)) 287 cast = int 288 289 qfalse = query == 'false' 290 for val, book_ids in field_iter(): 291 if val is None: 292 if qfalse: 293 matches |= book_ids 294 continue 295 try: 296 v = cast(val) 297 except Exception: 298 v = None 299 if v: 300 v = adjust(v) 301 if relop(v, q): 302 matches |= book_ids 303 return matches 304 305# }}} 306 307 308class BooleanSearch: # {{{ 309 310 def __init__(self): 311 self.local_no = icu_lower(_('no')) 312 self.local_yes = icu_lower(_('yes')) 313 self.local_unchecked = icu_lower(_('unchecked')) 314 self.local_checked = icu_lower(_('checked')) 315 self.local_empty = icu_lower(_('empty')) 316 self.local_blank = icu_lower(_('blank')) 317 self.local_bool_values = { 318 self.local_no, self.local_unchecked, '_no', 'false', 'no', 'unchecked', '_unchecked', 319 self.local_yes, self.local_checked, 'checked', '_checked', '_yes', 'true', 'yes', 320 self.local_empty, self.local_blank, 'blank', '_blank', '_empty', 'empty'} 321 322 def __call__(self, query, field_iter, bools_are_tristate): 323 matches = set() 324 if query not in self.local_bool_values: 325 raise ParseException(_('Invalid boolean query "{0}"').format(query)) 326 for val, book_ids in field_iter(): 327 val = force_to_bool(val) 328 if not bools_are_tristate: 329 if val is None or not val: # item is None or set to false 330 if query in {self.local_no, self.local_unchecked, 'unchecked', '_unchecked', 'no', '_no', 'false'}: 331 matches |= book_ids 332 else: # item is explicitly set to true 333 if query in {self.local_yes, self.local_checked, 'checked', '_checked', 'yes', '_yes', 'true'}: 334 matches |= book_ids 335 else: 336 if val is None: 337 if query in {self.local_empty, self.local_blank, 'blank', '_blank', 'empty', '_empty', 'false'}: 338 matches |= book_ids 339 elif not val: # is not None and false 340 if query in {self.local_no, self.local_unchecked, 'unchecked', '_unchecked', 'no', '_no', 'true'}: 341 matches |= book_ids 342 else: # item is not None and true 343 if query in {self.local_yes, self.local_checked, 'checked', '_checked', 'yes', '_yes', 'true'}: 344 matches |= book_ids 345 return matches 346 347# }}} 348 349 350class KeyPairSearch: # {{{ 351 352 def __call__(self, query, field_iter, candidates, use_primary_find): 353 matches = set() 354 if ':' in query: 355 q = [q.strip() for q in query.partition(':')[0::2]] 356 keyq, valq = q 357 keyq_mkind, keyq = _matchkind(keyq) 358 valq_mkind, valq = _matchkind(valq) 359 else: 360 keyq = keyq_mkind = '' 361 valq_mkind, valq = _matchkind(query) 362 363 if valq in {'true', 'false'}: 364 found = set() 365 if keyq: 366 for val, book_ids in field_iter(): 367 if val and val.get(keyq, False): 368 found |= book_ids 369 else: 370 for val, book_ids in field_iter(): 371 if val: 372 found |= book_ids 373 return found if valq == 'true' else candidates - found 374 375 for m, book_ids in field_iter(): 376 for key, val in iteritems(m): 377 if (keyq and not _match(keyq, (key,), keyq_mkind, 378 use_primary_find_in_search=use_primary_find)): 379 continue 380 if (valq and not _match(valq, (val,), valq_mkind, 381 use_primary_find_in_search=use_primary_find)): 382 continue 383 matches |= book_ids 384 break 385 386 return matches 387 388# }}} 389 390 391class SavedSearchQueries: # {{{ 392 queries = {} 393 opt_name = '' 394 395 def __init__(self, db, _opt_name): 396 self.opt_name = _opt_name 397 try: 398 self._db = weakref.ref(db) 399 except TypeError: 400 # db could be None 401 self._db = lambda : None 402 self.load_from_db() 403 404 def load_from_db(self): 405 db = self.db 406 if db is not None: 407 self.queries = db._pref(self.opt_name, default={}) 408 else: 409 self.queries = {} 410 411 @property 412 def db(self): 413 return self._db() 414 415 def force_unicode(self, x): 416 if not isinstance(x, str): 417 x = x.decode(preferred_encoding, 'replace') 418 return x 419 420 def add(self, name, value): 421 db = self.db 422 if db is not None: 423 self.queries[self.force_unicode(name)] = self.force_unicode(value).strip() 424 db._set_pref(self.opt_name, self.queries) 425 426 def lookup(self, name): 427 sn = self.force_unicode(name).lower() 428 for n, q in self.queries.items(): 429 if sn == n.lower(): 430 return q 431 return None 432 433 def delete(self, name): 434 db = self.db 435 if db is not None: 436 self.queries.pop(self.force_unicode(name), False) 437 db._set_pref(self.opt_name, self.queries) 438 439 def rename(self, old_name, new_name): 440 db = self.db 441 if db is not None: 442 self.queries[self.force_unicode(new_name)] = self.queries.get(self.force_unicode(old_name), None) 443 self.queries.pop(self.force_unicode(old_name), False) 444 db._set_pref(self.opt_name, self.queries) 445 446 def set_all(self, smap): 447 db = self.db 448 if db is not None: 449 self.queries = smap 450 db._set_pref(self.opt_name, smap) 451 452 def names(self): 453 return sorted(self.queries, key=sort_key) 454# }}} 455 456 457class Parser(SearchQueryParser): # {{{ 458 459 def __init__(self, dbcache, all_book_ids, gst, date_search, num_search, 460 bool_search, keypair_search, limit_search_columns, limit_search_columns_to, 461 locations, virtual_fields, lookup_saved_search, parse_cache): 462 self.dbcache, self.all_book_ids = dbcache, all_book_ids 463 self.all_search_locations = frozenset(locations) 464 self.grouped_search_terms = gst 465 self.date_search, self.num_search = date_search, num_search 466 self.bool_search, self.keypair_search = bool_search, keypair_search 467 self.limit_search_columns, self.limit_search_columns_to = ( 468 limit_search_columns, limit_search_columns_to) 469 self.virtual_fields = virtual_fields or {} 470 if 'marked' not in self.virtual_fields: 471 self.virtual_fields['marked'] = self 472 SearchQueryParser.__init__(self, locations, optimize=True, lookup_saved_search=lookup_saved_search, parse_cache=parse_cache) 473 474 @property 475 def field_metadata(self): 476 return self.dbcache.field_metadata 477 478 def universal_set(self): 479 return self.all_book_ids 480 481 def field_iter(self, name, candidates): 482 get_metadata = self.dbcache._get_proxy_metadata 483 try: 484 field = self.dbcache.fields[name] 485 except KeyError: 486 field = self.virtual_fields[name] 487 self.virtual_field_used = True 488 return field.iter_searchable_values(get_metadata, candidates) 489 490 def iter_searchable_values(self, *args, **kwargs): 491 for x in (): 492 yield x, set() 493 494 def parse(self, *args, **kwargs): 495 self.virtual_field_used = False 496 return SearchQueryParser.parse(self, *args, **kwargs) 497 498 def get_matches(self, location, query, candidates=None, 499 allow_recursion=True): 500 # If candidates is not None, it must not be modified. Changing its 501 # value will break query optimization in the search parser 502 matches = set() 503 504 if candidates is None: 505 candidates = self.all_book_ids 506 if not candidates or not query or not query.strip(): 507 return matches 508 if location not in self.all_search_locations: 509 return matches 510 511 if location == 'vl': 512 vl = self.dbcache._pref('virtual_libraries', {}).get(query) if query else None 513 if not vl: 514 raise ParseException(_('No such Virtual library: {}').format(query)) 515 try: 516 return candidates & self.dbcache.books_in_virtual_library( 517 query, virtual_fields=self.virtual_fields) 518 except RuntimeError: 519 raise ParseException(_('Virtual library search is recursive: {}').format(query)) 520 521 if (len(location) > 2 and location.startswith('@') and 522 location[1:] in self.grouped_search_terms): 523 location = location[1:] 524 525 # get metadata key associated with the search term. Eliminates 526 # dealing with plurals and other aliases 527 original_location = location 528 location = self.field_metadata.search_term_to_field_key( 529 icu_lower(location.strip())) 530 # grouped search terms 531 if isinstance(location, list): 532 if allow_recursion: 533 if query.lower() == 'false': 534 invert = True 535 query = 'true' 536 else: 537 invert = False 538 for loc in location: 539 c = candidates.copy() 540 m = self.get_matches(loc, query, 541 candidates=c, allow_recursion=False) 542 matches |= m 543 c -= m 544 if len(c) == 0: 545 break 546 if invert: 547 matches = self.all_book_ids - matches 548 return matches 549 raise ParseException( 550 _('Recursive query group detected: {0}').format(query)) 551 552 # If the user has asked to restrict searching over all field, apply 553 # that restriction 554 if (location == 'all' and self.limit_search_columns and 555 self.limit_search_columns_to): 556 terms = set() 557 for l in self.limit_search_columns_to: 558 l = icu_lower(l.strip()) 559 if l and l != 'all' and l in self.all_search_locations: 560 terms.add(l) 561 if terms: 562 c = candidates.copy() 563 for l in terms: 564 try: 565 m = self.get_matches(l, query, 566 candidates=c, allow_recursion=allow_recursion) 567 matches |= m 568 c -= m 569 if len(c) == 0: 570 break 571 except: 572 pass 573 return matches 574 575 upf = prefs['use_primary_find_in_search'] 576 577 if location in self.field_metadata: 578 fm = self.field_metadata[location] 579 dt = fm['datatype'] 580 581 # take care of dates special case 582 if (dt == 'datetime' or ( 583 dt == 'composite' and 584 fm['display'].get('composite_sort', '') == 'date')): 585 if location == 'date': 586 location = 'timestamp' 587 return self.date_search( 588 icu_lower(query), partial(self.field_iter, location, candidates)) 589 590 # take care of numbers special case 591 if (dt in ('rating', 'int', 'float') or 592 (dt == 'composite' and 593 fm['display'].get('composite_sort', '') == 'number')): 594 if location == 'id': 595 is_many = False 596 597 def fi(default_value=None): 598 for qid in candidates: 599 yield qid, {qid} 600 else: 601 field = self.dbcache.fields[location] 602 fi, is_many = partial(self.field_iter, location, candidates), field.is_many 603 if dt == 'rating' and fm['display'].get('allow_half_stars'): 604 dt = 'half-rating' 605 return self.num_search( 606 icu_lower(query), fi, location, dt, candidates, is_many=is_many) 607 608 # take care of the 'count' operator for is_multiples 609 if (fm['is_multiple'] and 610 len(query) > 1 and query[0] == '#' and query[1] in '=<>!'): 611 return self.num_search(icu_lower(query[1:]), partial( 612 self.dbcache.fields[location].iter_counts, candidates, 613 get_metadata=self.dbcache._get_proxy_metadata), 614 location, dt, candidates) 615 616 # take care of boolean special case 617 if dt == 'bool': 618 return self.bool_search(icu_lower(query), 619 partial(self.field_iter, location, candidates), 620 self.dbcache._pref('bools_are_tristate')) 621 622 # special case: colon-separated fields such as identifiers. isbn 623 # is a special case within the case 624 if fm.get('is_csp', False): 625 field_iter = partial(self.field_iter, location, candidates) 626 if location == 'identifiers' and original_location == 'isbn': 627 return self.keypair_search('=isbn:'+query, field_iter, 628 candidates, upf) 629 return self.keypair_search(query, field_iter, candidates, upf) 630 631 # check for user categories 632 if len(location) >= 2 and location.startswith('@'): 633 return self.get_user_category_matches(location[1:], icu_lower(query), candidates) 634 635 # Everything else (and 'all' matches) 636 case_sensitive = prefs['case_sensitive'] 637 638 if location == 'template': 639 try: 640 template, sep, query = regex.split('#@#:([tdnb]):', query, flags=regex.IGNORECASE) 641 if sep: 642 sep = sep.lower() 643 else: 644 sep = 't' 645 except: 646 if DEBUG: 647 import traceback 648 traceback.print_exc() 649 raise ParseException(_('search template: missing or invalid separator. Valid separators are: {}').format('#@#:[tdnb]:')) 650 matchkind, query = _matchkind(query, case_sensitive=case_sensitive) 651 matches = set() 652 error_string = '*@*TEMPLATE_ERROR*@*' 653 template_cache = {} 654 for book_id in candidates: 655 mi = self.dbcache.get_proxy_metadata(book_id) 656 val = mi.formatter.safe_format(template, {}, error_string, mi, 657 column_name='search template', 658 template_cache=template_cache) 659 if val.startswith(error_string): 660 raise ParseException(val[len(error_string):]) 661 if sep == 't': 662 if _match(query, [val,], matchkind, use_primary_find_in_search=upf, 663 case_sensitive=case_sensitive): 664 matches.add(book_id) 665 elif sep == 'n' and val: 666 matches.update(self.num_search( 667 icu_lower(query), {val:{book_id,}}.items, '', '', 668 {book_id,}, is_many=False)) 669 elif sep == 'd' and val: 670 matches.update(self.date_search( 671 icu_lower(query), {val:{book_id,}}.items)) 672 elif sep == 'b': 673 matches.update(self.bool_search(icu_lower(query), 674 {'True' if val else 'False':{book_id,}}.items, False)) 675 676 return matches 677 678 matchkind, query = _matchkind(query, case_sensitive=case_sensitive) 679 all_locs = set() 680 text_fields = set() 681 field_metadata = {} 682 683 for x, fm in self.field_metadata.iter_items(): 684 if x.startswith('@'): 685 continue 686 if fm['search_terms'] and x not in {'series_sort', 'id'}: 687 if x not in self.virtual_fields and x != 'uuid': 688 # We dont search virtual fields because if we do, search 689 # caching will not be used 690 all_locs.add(x) 691 field_metadata[x] = fm 692 if fm['datatype'] in {'composite', 'text', 'comments', 'series', 'enumeration'}: 693 text_fields.add(x) 694 695 locations = all_locs if location == 'all' else {location} 696 697 current_candidates = set(candidates) 698 699 try: 700 rating_query = int(float(query)) * 2 701 except: 702 rating_query = None 703 704 try: 705 int_query = int(float(query)) 706 except: 707 int_query = None 708 709 try: 710 float_query = float(query) 711 except: 712 float_query = None 713 714 for location in locations: 715 current_candidates -= matches 716 q = query 717 if location == 'languages': 718 q = canonicalize_lang(query) 719 if q is None: 720 lm = lang_map() 721 rm = {v.lower():k for k,v in iteritems(lm)} 722 q = rm.get(query, query) 723 724 if matchkind == CONTAINS_MATCH and q.lower() in {'true', 'false'}: 725 found = set() 726 for val, book_ids in self.field_iter(location, current_candidates): 727 if val and (not hasattr(val, 'strip') or val.strip()): 728 found |= book_ids 729 matches |= (found if q.lower() == 'true' else (current_candidates-found)) 730 continue 731 732 dt = field_metadata.get(location, {}).get('datatype', None) 733 if dt == 'rating': 734 if rating_query is not None: 735 for val, book_ids in self.field_iter(location, current_candidates): 736 if val == rating_query: 737 matches |= book_ids 738 continue 739 740 if dt == 'float': 741 if float_query is not None: 742 for val, book_ids in self.field_iter(location, current_candidates): 743 if val == float_query: 744 matches |= book_ids 745 continue 746 747 if dt == 'int': 748 if int_query is not None: 749 for val, book_ids in self.field_iter(location, current_candidates): 750 if val == int_query: 751 matches |= book_ids 752 continue 753 754 if location in text_fields: 755 for val, book_ids in self.field_iter(location, current_candidates): 756 if val is not None: 757 if isinstance(val, string_or_bytes): 758 val = (val,) 759 if _match(q, val, matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive): 760 matches |= book_ids 761 762 if location == 'series_sort': 763 book_lang_map = self.dbcache.fields['languages'].book_value_map 764 for val, book_ids in self.dbcache.fields['series'].iter_searchable_values_for_sort(current_candidates, book_lang_map): 765 if val is not None: 766 if _match(q, (val,), matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive): 767 matches |= book_ids 768 769 return matches 770 771 def get_user_category_matches(self, location, query, candidates): 772 matches = set() 773 if len(query) < 2: 774 return matches 775 776 user_cats = self.dbcache._pref('user_categories') 777 c = set(candidates) 778 779 if query.startswith('.'): 780 check_subcats = True 781 query = query[1:] 782 else: 783 check_subcats = False 784 785 for key in user_cats: 786 if key == location or (check_subcats and key.startswith(location + '.')): 787 for (item, category, ign) in user_cats[key]: 788 s = self.get_matches(category, '=' + item, candidates=c) 789 c -= s 790 matches |= s 791 if query == 'false': 792 return candidates - matches 793 return matches 794# }}} 795 796 797class LRUCache: # {{{ 798 799 'A simple Least-Recently-Used cache' 800 801 def __init__(self, limit=50): 802 self.item_map = {} 803 self.age_map = deque() 804 self.limit = limit 805 806 def _move_up(self, key): 807 if key != self.age_map[-1]: 808 self.age_map.remove(key) 809 self.age_map.append(key) 810 811 def add(self, key, val): 812 if key in self.item_map: 813 self._move_up(key) 814 return 815 816 if len(self.age_map) >= self.limit: 817 self.item_map.pop(self.age_map.popleft()) 818 819 self.item_map[key] = val 820 self.age_map.append(key) 821 __setitem__ = add 822 823 def get(self, key, default=None): 824 ans = self.item_map.get(key, default) 825 if ans is not default: 826 self._move_up(key) 827 return ans 828 829 def clear(self): 830 self.item_map.clear() 831 self.age_map.clear() 832 833 def pop(self, key, default=None): 834 self.item_map.pop(key, default) 835 try: 836 self.age_map.remove(key) 837 except ValueError: 838 pass 839 840 def __contains__(self, key): 841 return key in self.item_map 842 843 def __len__(self): 844 return len(self.age_map) 845 846 def __getitem__(self, key): 847 return self.get(key) 848 849 def __iter__(self): 850 return iteritems(self.item_map) 851# }}} 852 853 854class Search: 855 856 MAX_CACHE_UPDATE = 50 857 858 def __init__(self, db, opt_name, all_search_locations=()): 859 self.all_search_locations = all_search_locations 860 self.date_search = DateSearch() 861 self.num_search = NumericSearch() 862 self.bool_search = BooleanSearch() 863 self.keypair_search = KeyPairSearch() 864 self.saved_searches = SavedSearchQueries(db, opt_name) 865 self.cache = LRUCache() 866 self.parse_cache = LRUCache(limit=100) 867 868 def get_saved_searches(self): 869 return self.saved_searches 870 871 def change_locations(self, newlocs): 872 if frozenset(newlocs) != frozenset(self.all_search_locations): 873 self.clear_caches() 874 self.parse_cache.clear() 875 self.all_search_locations = newlocs 876 877 def update_or_clear(self, dbcache, book_ids=None): 878 if book_ids and (len(book_ids) * len(self.cache)) <= self.MAX_CACHE_UPDATE: 879 self.update_caches(dbcache, book_ids) 880 else: 881 self.clear_caches() 882 883 def clear_caches(self): 884 self.cache.clear() 885 886 def update_caches(self, dbcache, book_ids): 887 sqp = self.create_parser(dbcache) 888 try: 889 return self._update_caches(sqp, book_ids) 890 finally: 891 sqp.dbcache = sqp.lookup_saved_search = None 892 893 def discard_books(self, book_ids): 894 book_ids = set(book_ids) 895 for query, result in self.cache: 896 result.difference_update(book_ids) 897 898 def _update_caches(self, sqp, book_ids): 899 book_ids = sqp.all_book_ids = set(book_ids) 900 remove = set() 901 for query, result in tuple(self.cache): 902 try: 903 matches = sqp.parse(query) 904 except ParseException: 905 remove.add(query) 906 else: 907 # remove books that no longer match 908 result.difference_update(book_ids - matches) 909 # add books that now match but did not before 910 result.update(matches) 911 for query in remove: 912 self.cache.pop(query) 913 914 def create_parser(self, dbcache, virtual_fields=None): 915 return Parser( 916 dbcache, set(), dbcache._pref('grouped_search_terms'), 917 self.date_search, self.num_search, self.bool_search, 918 self.keypair_search, 919 prefs['limit_search_columns'], 920 prefs['limit_search_columns_to'], self.all_search_locations, 921 virtual_fields, self.saved_searches.lookup, self.parse_cache) 922 923 def __call__(self, dbcache, query, search_restriction, virtual_fields=None, book_ids=None): 924 ''' 925 Return the set of ids of all records that match the specified 926 query and restriction 927 ''' 928 # We construct a new parser instance per search as the parse is not 929 # thread safe. 930 sqp = self.create_parser(dbcache, virtual_fields) 931 try: 932 return self._do_search(sqp, query, search_restriction, dbcache, book_ids=book_ids) 933 finally: 934 sqp.dbcache = sqp.lookup_saved_search = None 935 936 def query_is_cacheable(self, sqp, dbcache, query): 937 if query: 938 for name, value in sqp.get_queried_fields(query): 939 if name == 'template' and '#@#:d:' in value: 940 return False 941 elif name in dbcache.field_metadata.all_field_keys(): 942 fm = dbcache.field_metadata[name] 943 if fm['datatype'] == 'datetime': 944 return False 945 if fm['datatype'] == 'composite': 946 if fm.get('display', {}).get('composite_sort', '') == 'date': 947 return False 948 return True 949 950 def _do_search(self, sqp, query, search_restriction, dbcache, book_ids=None): 951 ''' Do the search, caching the results. Results are cached only if the 952 search is on the full library and no virtual field is searched on ''' 953 if isinstance(search_restriction, bytes): 954 search_restriction = search_restriction.decode('utf-8') 955 if isinstance(query, bytes): 956 query = query.decode('utf-8') 957 958 query = query.strip() 959 use_cache = self.query_is_cacheable(sqp, dbcache, query) 960 961 if use_cache and book_ids is None and query and not search_restriction: 962 cached = self.cache.get(query) 963 if cached is not None: 964 return cached 965 966 restricted_ids = all_book_ids = dbcache._all_book_ids(type=set) 967 if search_restriction and search_restriction.strip(): 968 sr = search_restriction.strip() 969 sqp.all_book_ids = all_book_ids if book_ids is None else book_ids 970 if self.query_is_cacheable(sqp, dbcache, sr): 971 cached = self.cache.get(sr) 972 if cached is None: 973 restricted_ids = sqp.parse(sr) 974 if not sqp.virtual_field_used and sqp.all_book_ids is all_book_ids: 975 self.cache.add(sr, restricted_ids) 976 else: 977 restricted_ids = cached 978 if book_ids is not None: 979 restricted_ids = book_ids.intersection(restricted_ids) 980 else: 981 restricted_ids = sqp.parse(sr) 982 elif book_ids is not None: 983 restricted_ids = book_ids 984 985 if not query: 986 return restricted_ids 987 988 if use_cache and restricted_ids is all_book_ids: 989 cached = self.cache.get(query) 990 if cached is not None: 991 return cached 992 993 sqp.all_book_ids = restricted_ids 994 result = sqp.parse(query) 995 996 if not sqp.virtual_field_used and sqp.all_book_ids is all_book_ids: 997 self.cache.add(query, result) 998 999 return result 1000