1#!/usr/local/bin/python3.8 2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' 7__docformat__ = 'restructuredtext en' 8 9import time, traceback, locale 10from itertools import repeat 11from datetime import timedelta 12from threading import Thread 13 14from calibre.utils.config import tweaks, prefs 15from calibre.utils.date import parse_date, now, UNDEFINED_DATE, clean_date_for_sort 16from calibre.utils.search_query_parser import SearchQueryParser 17from calibre.utils.search_query_parser import ParseException 18from calibre.utils.localization import (canonicalize_lang, lang_map, get_udc) 19from calibre.db.search import CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, _match 20from calibre.ebooks.metadata import title_sort, author_to_author_sort 21from calibre.ebooks.metadata.opf2 import metadata_to_opf 22from calibre import prints, force_unicode 23from polyglot.builtins import iteritems, itervalues, string_or_bytes, cmp 24 25 26class MetadataBackup(Thread): # {{{ 27 ''' 28 Continuously backup changed metadata into OPF files 29 in the book directory. This class runs in its own 30 thread and makes sure that the actual file write happens in the 31 GUI thread to prevent Windows' file locking from causing problems. 32 ''' 33 34 def __init__(self, db): 35 Thread.__init__(self) 36 self.daemon = True 37 self.db = db 38 self.keep_running = True 39 from calibre.gui2 import FunctionDispatcher 40 self.do_write = FunctionDispatcher(self.write) 41 self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump) 42 self.clear_dirtied = FunctionDispatcher(db.clear_dirtied) 43 self.set_dirtied = FunctionDispatcher(db.dirtied) 44 45 def stop(self): 46 self.keep_running = False 47 48 def break_cycles(self): 49 # Break cycles so that this object doesn't hold references to db 50 self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \ 51 self.set_dirtied = self.db = None 52 53 def run(self): 54 while self.keep_running: 55 try: 56 time.sleep(2) # Limit to one book per two seconds 57 (id_, sequence) = self.db.get_a_dirtied_book() 58 if id_ is None: 59 continue 60 # print 'writer thread', id_, sequence 61 except: 62 # Happens during interpreter shutdown 63 break 64 if not self.keep_running: 65 break 66 67 try: 68 path, mi, sequence = self.get_metadata_for_dump(id_) 69 except: 70 prints('Failed to get backup metadata for id:', id_, 'once') 71 traceback.print_exc() 72 time.sleep(2) 73 try: 74 path, mi, sequence = self.get_metadata_for_dump(id_) 75 except: 76 prints('Failed to get backup metadata for id:', id_, 'again, giving up') 77 traceback.print_exc() 78 continue 79 80 if mi is None: 81 self.clear_dirtied(id_, sequence) 82 continue 83 if not self.keep_running: 84 break 85 86 # Give the GUI thread a chance to do something. Python threads don't 87 # have priorities, so this thread would naturally keep the processor 88 # until some scheduling event happens. The sleep makes such an event 89 time.sleep(0.1) 90 try: 91 raw = metadata_to_opf(mi) 92 except: 93 prints('Failed to convert to opf for id:', id_) 94 traceback.print_exc() 95 continue 96 97 if not self.keep_running: 98 break 99 100 time.sleep(0.1) # Give the GUI thread a chance to do something 101 try: 102 self.do_write(path, raw) 103 except: 104 prints('Failed to write backup metadata for id:', id_, 'once') 105 time.sleep(2) 106 try: 107 self.do_write(path, raw) 108 except: 109 prints('Failed to write backup metadata for id:', id_, 110 'again, giving up') 111 continue 112 113 self.clear_dirtied(id_, sequence) 114 self.break_cycles() 115 116 def write(self, path, raw): 117 with lopen(path, 'wb') as f: 118 f.write(raw) 119 120 121# }}} 122 123# Global utility function for get_match here and in gui2/library.py 124# This is a global for performance 125pref_use_primary_find_in_search = False 126 127 128def set_use_primary_find_in_search(toWhat): 129 global pref_use_primary_find_in_search 130 pref_use_primary_find_in_search = toWhat 131 132 133y, c, n, u = map(icu_lower, (_('yes'), _('checked'), _('no'), _('unchecked'))) 134yes_vals = {y, c, 'true'} 135no_vals = {n, u, 'false'} 136del y, c, n, u 137 138 139def force_to_bool(val): 140 if isinstance(val, (bytes, str)): 141 if isinstance(val, bytes): 142 val = force_unicode(val) 143 try: 144 val = icu_lower(val) 145 if not val: 146 val = None 147 elif val in yes_vals: 148 val = True 149 elif val in no_vals: 150 val = False 151 else: 152 val = bool(int(val)) 153 except: 154 val = None 155 return val 156 157 158class CacheRow(list): # {{{ 159 160 def __init__(self, db, composites, datetimes, val, series_col, series_sort_col): 161 from calibre.db.tables import c_parse 162 self.db = db 163 self._composites = composites 164 for num in datetimes: 165 val[num] = c_parse(val[num]) 166 if val[num] is UNDEFINED_DATE: 167 val[num] = None 168 list.__init__(self, val) 169 self._must_do = len(composites) > 0 170 self._series_col = series_col 171 self._series_sort_col = series_sort_col 172 self._series_sort = None 173 174 def __getitem__(self, col): 175 if self._must_do: 176 is_comp = False 177 if isinstance(col, slice): 178 start = 0 if col.start is None else col.start 179 step = 1 if col.stop is None else col.stop 180 for c in range(start, col.stop, step): 181 if c in self._composites: 182 is_comp = True 183 break 184 elif col in self._composites: 185 is_comp = True 186 if is_comp: 187 id_ = list.__getitem__(self, 0) 188 self._must_do = False 189 mi = self.db.get_metadata(id_, index_is_id=True, 190 get_user_categories=False) 191 for c in self._composites: 192 self[c] = mi.get(self._composites[c]) 193 if col == self._series_sort_col and self._series_sort is None: 194 if self[self._series_col]: 195 self._series_sort = title_sort(self[self._series_col]) 196 self[self._series_sort_col] = self._series_sort 197 else: 198 self._series_sort = '' 199 self[self._series_sort_col] = '' 200 return list.__getitem__(self, col) 201 202 def __getslice__(self, i, j): 203 return self.__getitem__(slice(i, j)) 204 205 def refresh_composites(self): 206 for c in self._composites: 207 self[c] = None 208 self._must_do = True 209 210# }}} 211 212 213class ResultCache(SearchQueryParser): # {{{ 214 215 ''' 216 Stores sorted and filtered metadata in memory. 217 ''' 218 219 def __init__(self, FIELD_MAP, field_metadata, db_prefs=None): 220 self.FIELD_MAP = FIELD_MAP 221 self.db_prefs = db_prefs 222 self.composites = {} 223 self.datetimes = set() 224 self.udc = get_udc() 225 for key in field_metadata: 226 dt = field_metadata[key]['datatype'] 227 if dt == 'composite': 228 self.composites[field_metadata[key]['rec_index']] = key 229 elif dt == 'datetime': 230 self.datetimes.add(field_metadata[key]['rec_index']) 231 self.series_col = field_metadata['series']['rec_index'] 232 self.series_sort_col = field_metadata['series_sort']['rec_index'] 233 self._data = [] 234 self._map = self._map_filtered = [] 235 self.first_sort = True 236 self.search_restriction = self.base_restriction = '' 237 self.base_restriction_name = self.search_restriction_name = '' 238 self.search_restriction_book_count = 0 239 self.marked_ids_dict = {} 240 self.field_metadata = field_metadata 241 self.all_search_locations = field_metadata.get_search_terms() 242 SearchQueryParser.__init__(self, self.all_search_locations, optimize=True) 243 self.build_date_relop_dict() 244 self.build_numeric_relop_dict() 245 # Do this here so the var get updated when a library changes 246 global pref_use_primary_find_in_search 247 pref_use_primary_find_in_search = prefs['use_primary_find_in_search'] 248 self._uuid_column_index = self.FIELD_MAP['uuid'] 249 self._uuid_map = {} 250 251 def break_cycles(self): 252 self._data = self.field_metadata = self.FIELD_MAP = \ 253 self.numeric_search_relops = self.date_search_relops = \ 254 self.db_prefs = self.all_search_locations = None 255 self.sqp_change_locations([]) 256 257 def __getitem__(self, row): 258 return self._data[self._map_filtered[row]] 259 260 def __len__(self): 261 return len(self._map_filtered) 262 263 def __iter__(self): 264 for id in self._map_filtered: 265 yield self._data[id] 266 267 def iterall(self): 268 for x in self._data: 269 if x is not None: 270 yield x 271 272 def iterallids(self): 273 idx = self.FIELD_MAP['id'] 274 for x in self.iterall(): 275 yield x[idx] 276 277 # Search functions {{{ 278 279 def universal_set(self): 280 return {i[0] for i in self._data if i is not None} 281 282 def change_search_locations(self, locations): 283 self.sqp_change_locations(locations) 284 self.all_search_locations = locations 285 286 def build_date_relop_dict(self): 287 ''' 288 Because the database dates have time in them, we can't use direct 289 comparisons even when field_count == 3. The query has time = 0, but 290 the database object has time == something. As such, a complete compare 291 will almost never be correct. 292 ''' 293 def relop_eq(db, query, field_count): 294 if db.year == query.year: 295 if field_count == 1: 296 return True 297 if db.month == query.month: 298 if field_count == 2: 299 return True 300 return db.day == query.day 301 return False 302 303 def relop_gt(db, query, field_count): 304 if db.year > query.year: 305 return True 306 if field_count > 1 and db.year == query.year: 307 if db.month > query.month: 308 return True 309 return field_count == 3 and db.month == query.month and db.day > query.day 310 return False 311 312 def relop_lt(db, query, field_count): 313 if db.year < query.year: 314 return True 315 if field_count > 1 and db.year == query.year: 316 if db.month < query.month: 317 return True 318 return field_count == 3 and db.month == query.month and db.day < query.day 319 return False 320 321 def relop_ne(db, query, field_count): 322 return not relop_eq(db, query, field_count) 323 324 def relop_ge(db, query, field_count): 325 return not relop_lt(db, query, field_count) 326 327 def relop_le(db, query, field_count): 328 return not relop_gt(db, query, field_count) 329 330 self.date_search_relops = { 331 '=' :[1, relop_eq], 332 '>' :[1, relop_gt], 333 '<' :[1, relop_lt], 334 '!=':[2, relop_ne], 335 '>=':[2, relop_ge], 336 '<=':[2, relop_le] 337 } 338 339 local_today = ('_today', icu_lower(_('today'))) 340 local_yesterday = ('_yesterday', icu_lower(_('yesterday'))) 341 local_thismonth = ('_thismonth', icu_lower(_('thismonth'))) 342 local_daysago = icu_lower(_('daysago')) 343 local_daysago_len = len(local_daysago) 344 untrans_daysago = '_daysago' 345 untrans_daysago_len = len('_daysago') 346 347 def get_dates_matches(self, location, query, candidates): 348 matches = set() 349 if len(query) < 2: 350 return matches 351 352 if location == 'date': 353 location = 'timestamp' 354 loc = self.field_metadata[location]['rec_index'] 355 356 if query == 'false': 357 for id_ in candidates: 358 item = self._data[id_] 359 if item is None: 360 continue 361 v = item[loc] 362 if isinstance(v, (bytes, str)): 363 v = parse_date(v) 364 if v is None or v <= UNDEFINED_DATE: 365 matches.add(item[0]) 366 return matches 367 if query == 'true': 368 for id_ in candidates: 369 item = self._data[id_] 370 if item is None: 371 continue 372 v = item[loc] 373 if isinstance(v, (bytes, str)): 374 v = parse_date(v) 375 if v is not None and v > UNDEFINED_DATE: 376 matches.add(item[0]) 377 return matches 378 379 relop = None 380 for k in self.date_search_relops.keys(): 381 if query.startswith(k): 382 (p, relop) = self.date_search_relops[k] 383 query = query[p:] 384 if relop is None: 385 (p, relop) = self.date_search_relops['='] 386 387 if query in self.local_today: 388 qd = now() 389 field_count = 3 390 elif query in self.local_yesterday: 391 qd = now() - timedelta(1) 392 field_count = 3 393 elif query in self.local_thismonth: 394 qd = now() 395 field_count = 2 396 elif query.endswith(self.local_daysago) or query.endswith(self.untrans_daysago): 397 num = query[0:-(self.untrans_daysago_len if query.endswith(self.untrans_daysago) else self.local_daysago_len)] 398 try: 399 qd = now() - timedelta(int(num)) 400 except: 401 raise ParseException(_('Number conversion error: {0}').format(num)) 402 field_count = 3 403 else: 404 try: 405 qd = parse_date(query, as_utc=False) 406 except: 407 raise ParseException(_('Date conversion error: {0}').format(query)) 408 if '-' in query: 409 field_count = query.count('-') + 1 410 else: 411 field_count = query.count('/') + 1 412 for id_ in candidates: 413 item = self._data[id_] 414 if item is None or item[loc] is None: 415 continue 416 v = item[loc] 417 if isinstance(v, (bytes, str)): 418 v = parse_date(v) 419 if relop(v, qd, field_count): 420 matches.add(item[0]) 421 return matches 422 423 def build_numeric_relop_dict(self): 424 self.numeric_search_relops = { 425 '=':[1, lambda r, q: r == q], 426 '>':[1, lambda r, q: r is not None and r > q], 427 '<':[1, lambda r, q: r is not None and r < q], 428 '!=':[2, lambda r, q: r != q], 429 '>=':[2, lambda r, q: r is not None and r >= q], 430 '<=':[2, lambda r, q: r is not None and r <= q] 431 } 432 433 def get_numeric_matches(self, location, query, candidates, val_func=None): 434 matches = set() 435 if len(query) == 0: 436 return matches 437 438 if val_func is None: 439 loc = self.field_metadata[location]['rec_index'] 440 val_func = lambda item, loc=loc: item[loc] 441 q = '' 442 cast = adjust = lambda x: x 443 dt = self.field_metadata[location]['datatype'] 444 445 if query == 'false': 446 if dt == 'rating' or location == 'cover': 447 relop = lambda x,y: not bool(x) 448 else: 449 relop = lambda x,y: x is None 450 elif query == 'true': 451 if dt == 'rating' or location == 'cover': 452 relop = lambda x,y: bool(x) 453 else: 454 relop = lambda x,y: x is not None 455 else: 456 relop = None 457 for k in self.numeric_search_relops.keys(): 458 if query.startswith(k): 459 (p, relop) = self.numeric_search_relops[k] 460 query = query[p:] 461 if relop is None: 462 (p, relop) = self.numeric_search_relops['='] 463 464 if dt == 'int': 465 cast = lambda x: int(x) 466 elif dt == 'rating': 467 cast = lambda x: 0 if x is None else int(x) 468 adjust = lambda x: x//2 469 elif dt in ('float', 'composite'): 470 cast = lambda x : float(x) 471 else: # count operation 472 cast = (lambda x: int(x)) 473 474 if len(query) > 1: 475 mult = query[-1:].lower() 476 mult = {'k':1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0) 477 if mult != 1.0: 478 query = query[:-1] 479 else: 480 mult = 1.0 481 try: 482 q = cast(query) * mult 483 except: 484 raise ParseException(_('Non-numeric value in query: {0}').format(query)) 485 486 for id_ in candidates: 487 item = self._data[id_] 488 if item is None: 489 continue 490 try: 491 v = cast(val_func(item)) 492 except: 493 v = None 494 if v: 495 v = adjust(v) 496 if relop(v, q): 497 matches.add(item[0]) 498 return matches 499 500 def get_user_category_matches(self, location, query, candidates): 501 matches = set() 502 if self.db_prefs is None or len(query) < 2: 503 return matches 504 user_cats = self.db_prefs.get('user_categories', []) 505 c = set(candidates) 506 507 if query.startswith('.'): 508 check_subcats = True 509 query = query[1:] 510 else: 511 check_subcats = False 512 513 for key in user_cats: 514 if key == location or (check_subcats and key.startswith(location + '.')): 515 for (item, category, ign) in user_cats[key]: 516 s = self.get_matches(category, '=' + item, candidates=c) 517 c -= s 518 matches |= s 519 if query == 'false': 520 return candidates - matches 521 return matches 522 523 def get_keypair_matches(self, location, query, candidates): 524 matches = set() 525 if query.find(':') >= 0: 526 q = [q.strip() for q in query.split(':')] 527 if len(q) != 2: 528 raise ParseException( 529 _('Invalid query format for colon-separated search: {0}').format(query)) 530 (keyq, valq) = q 531 keyq_mkind, keyq = self._matchkind(keyq) 532 valq_mkind, valq = self._matchkind(valq) 533 else: 534 keyq = keyq_mkind = '' 535 valq_mkind, valq = self._matchkind(query) 536 537 loc = self.field_metadata[location]['rec_index'] 538 split_char = self.field_metadata[location]['is_multiple'].get( 539 'cache_to_list', ',') 540 for id_ in candidates: 541 item = self._data[id_] 542 if item is None: 543 continue 544 545 if item[loc] is None: 546 if valq == 'false': 547 matches.add(id_) 548 continue 549 550 add_if_nothing_matches = valq == 'false' 551 pairs = [p.strip() for p in item[loc].split(split_char)] 552 for pair in pairs: 553 parts = pair.split(':') 554 if len(parts) != 2: 555 continue 556 k = parts[:1] 557 v = parts[1:] 558 if keyq and not _match(keyq, k, keyq_mkind, 559 use_primary_find_in_search=pref_use_primary_find_in_search): 560 continue 561 if valq: 562 if valq == 'true': 563 if not v: 564 continue 565 elif valq == 'false': 566 if v: 567 add_if_nothing_matches = False 568 continue 569 elif not _match(valq, v, valq_mkind, 570 use_primary_find_in_search=pref_use_primary_find_in_search): 571 continue 572 matches.add(id_) 573 574 if add_if_nothing_matches: 575 matches.add(id_) 576 return matches 577 578 def _matchkind(self, query): 579 matchkind = CONTAINS_MATCH 580 if (len(query) > 1): 581 if query.startswith('\\'): 582 query = query[1:] 583 elif query.startswith('='): 584 matchkind = EQUALS_MATCH 585 query = query[1:] 586 elif query.startswith('~'): 587 matchkind = REGEXP_MATCH 588 query = query[1:] 589 590 if matchkind != REGEXP_MATCH: 591 # leave case in regexps because it can be significant e.g. \S \W \D 592 query = icu_lower(query) 593 return matchkind, query 594 595 local_no = icu_lower(_('no')) 596 local_yes = icu_lower(_('yes')) 597 local_unchecked = icu_lower(_('unchecked')) 598 local_checked = icu_lower(_('checked')) 599 local_empty = icu_lower(_('empty')) 600 local_blank = icu_lower(_('blank')) 601 local_bool_values = ( 602 local_no, local_unchecked, '_no', 'false', 603 local_yes, local_checked, '_yes', 'true', 604 local_empty, local_blank, '_empty') 605 606 def get_bool_matches(self, location, query, candidates): 607 bools_are_tristate = self.db_prefs.get('bools_are_tristate') 608 loc = self.field_metadata[location]['rec_index'] 609 matches = set() 610 query = icu_lower(query) 611 if query not in self.local_bool_values: 612 raise ParseException(_('Invalid boolean query "{0}"').format(query)) 613 for id_ in candidates: 614 item = self._data[id_] 615 if item is None: 616 continue 617 618 val = force_to_bool(item[loc]) 619 if not bools_are_tristate: 620 if val is None or not val: # item is None or set to false 621 if query in (self.local_no, self.local_unchecked, '_no', 'false'): 622 matches.add(item[0]) 623 else: # item is explicitly set to true 624 if query in (self.local_yes, self.local_checked, '_yes', 'true'): 625 matches.add(item[0]) 626 else: 627 if val is None: 628 if query in (self.local_empty, self.local_blank, '_empty', 'false'): 629 matches.add(item[0]) 630 elif not val: # is not None and false 631 if query in (self.local_no, self.local_unchecked, '_no', 'true'): 632 matches.add(item[0]) 633 else: # item is not None and true 634 if query in (self.local_yes, self.local_checked, '_yes', 'true'): 635 matches.add(item[0]) 636 return matches 637 638 def get_matches(self, location, query, candidates=None, 639 allow_recursion=True): 640 # If candidates is not None, it must not be modified. Changing its 641 # value will break query optimization in the search parser 642 matches = set() 643 if candidates is None: 644 candidates = self.universal_set() 645 if len(candidates) == 0: 646 return matches 647 if location not in self.all_search_locations: 648 return matches 649 650 if len(location) > 2 and location.startswith('@') and \ 651 location[1:] in self.db_prefs['grouped_search_terms']: 652 location = location[1:] 653 654 if query and query.strip(): 655 # get metadata key associated with the search term. Eliminates 656 # dealing with plurals and other aliases 657 original_location = location 658 location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip())) 659 # grouped search terms 660 if isinstance(location, list): 661 if allow_recursion: 662 if query.lower() == 'false': 663 invert = True 664 query = 'true' 665 else: 666 invert = False 667 for loc in location: 668 c = candidates.copy() 669 m = self.get_matches(loc, query, 670 candidates=c, allow_recursion=False) 671 matches |= m 672 c -= m 673 if len(c) == 0: 674 break 675 if invert: 676 matches = self.universal_set() - matches 677 return matches 678 raise ParseException(_('Recursive query group detected: {0}').format(query)) 679 680 # apply the limit if appropriate 681 if location == 'all' and prefs['limit_search_columns'] and \ 682 prefs['limit_search_columns_to']: 683 terms = set() 684 for l in prefs['limit_search_columns_to']: 685 l = icu_lower(l.strip()) 686 if l and l != 'all' and l in self.all_search_locations: 687 terms.add(l) 688 if terms: 689 c = candidates.copy() 690 for l in terms: 691 try: 692 m = self.get_matches(l, query, 693 candidates=c, allow_recursion=allow_recursion) 694 matches |= m 695 c -= m 696 if len(c) == 0: 697 break 698 except: 699 pass 700 return matches 701 702 if location in self.field_metadata: 703 fm = self.field_metadata[location] 704 # take care of dates special case 705 if fm['datatype'] == 'datetime' or \ 706 (fm['datatype'] == 'composite' and 707 fm['display'].get('composite_sort', '') == 'date'): 708 return self.get_dates_matches(location, query.lower(), candidates) 709 710 # take care of numbers special case 711 if fm['datatype'] in ('rating', 'int', 'float') or \ 712 (fm['datatype'] == 'composite' and 713 fm['display'].get('composite_sort', '') == 'number'): 714 return self.get_numeric_matches(location, query.lower(), candidates) 715 716 if fm['datatype'] == 'bool': 717 return self.get_bool_matches(location, query, candidates) 718 719 # take care of the 'count' operator for is_multiples 720 if fm['is_multiple'] and \ 721 len(query) > 1 and query.startswith('#') and \ 722 query[1:1] in '=<>!': 723 vf = lambda item, loc=fm['rec_index'], \ 724 ms=fm['is_multiple']['cache_to_list']:\ 725 len(item[loc].split(ms)) if item[loc] is not None else 0 726 return self.get_numeric_matches(location, query[1:], 727 candidates, val_func=vf) 728 729 # special case: colon-separated fields such as identifiers. isbn 730 # is a special case within the case 731 if fm.get('is_csp', False): 732 if location == 'identifiers' and original_location == 'isbn': 733 return self.get_keypair_matches('identifiers', 734 '=isbn:'+query, candidates) 735 return self.get_keypair_matches(location, query, candidates) 736 737 # check for user categories 738 if len(location) >= 2 and location.startswith('@'): 739 return self.get_user_category_matches(location[1:], query.lower(), 740 candidates) 741 # everything else, or 'all' matches 742 matchkind, query = self._matchkind(query) 743 744 if not isinstance(query, str): 745 query = query.decode('utf-8') 746 747 db_col = {} 748 exclude_fields = [] # fields to not check when matching against text. 749 col_datatype = [] 750 is_multiple_cols = {} 751 for x in range(len(self.FIELD_MAP)): 752 col_datatype.append('') 753 for x in self.field_metadata: 754 if x.startswith('@'): 755 continue 756 if len(self.field_metadata[x]['search_terms']): 757 db_col[x] = self.field_metadata[x]['rec_index'] 758 if self.field_metadata[x]['datatype'] not in \ 759 ['composite', 'text', 'comments', 'series', 'enumeration']: 760 exclude_fields.append(db_col[x]) 761 col_datatype[db_col[x]] = self.field_metadata[x]['datatype'] 762 is_multiple_cols[db_col[x]] = \ 763 self.field_metadata[x]['is_multiple'].get('cache_to_list', None) 764 765 try: 766 rating_query = int(query) * 2 767 except: 768 rating_query = None 769 770 location = [location] if location != 'all' else list(db_col.keys()) 771 for i, loc in enumerate(location): 772 location[i] = db_col[loc] 773 774 current_candidates = candidates.copy() 775 for loc in location: # location is now an array of field indices 776 if loc == db_col['authors']: 777 # DB stores authors with commas changed to bars, so change query 778 if matchkind == REGEXP_MATCH: 779 q = query.replace(',', r'\|') 780 else: 781 q = query.replace(',', '|') 782 elif loc == db_col['languages']: 783 q = canonicalize_lang(query) 784 if q is None: 785 lm = lang_map() 786 rm = {v.lower():k for k,v in iteritems(lm)} 787 q = rm.get(query, query) 788 else: 789 q = query 790 791 for id_ in current_candidates: 792 item = self._data[id_] 793 if item is None: 794 continue 795 796 if not item[loc]: 797 if q == 'false' and matchkind == CONTAINS_MATCH: 798 matches.add(item[0]) 799 continue # item is empty. No possible matches below 800 if q == 'false'and matchkind == CONTAINS_MATCH: 801 # Field has something in it, so a false query does not match 802 continue 803 804 if q == 'true' and matchkind == CONTAINS_MATCH: 805 if isinstance(item[loc], string_or_bytes): 806 if item[loc].strip() == '': 807 continue 808 matches.add(item[0]) 809 continue 810 811 if col_datatype[loc] == 'rating': # get here if 'all' query 812 if rating_query and rating_query == int(item[loc]): 813 matches.add(item[0]) 814 continue 815 816 try: # a conversion below might fail 817 # relationals are not supported in 'all' queries 818 if col_datatype[loc] == 'float': 819 if float(query) == item[loc]: 820 matches.add(item[0]) 821 continue 822 if col_datatype[loc] == 'int': 823 if int(query) == item[loc]: 824 matches.add(item[0]) 825 continue 826 except: 827 # A conversion threw an exception. Because of the type, 828 # no further match is possible 829 continue 830 831 if loc not in exclude_fields: # time for text matching 832 if is_multiple_cols[loc] is not None: 833 vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] 834 else: 835 vals = [item[loc]] # make into list to make _match happy 836 if _match(q, vals, matchkind, 837 use_primary_find_in_search=pref_use_primary_find_in_search): 838 matches.add(item[0]) 839 continue 840 current_candidates -= matches 841 return matches 842 843 def search(self, query, return_matches=False, sort_results=True): 844 ans = self.search_getting_ids(query, self.search_restriction, 845 set_restriction_count=True, sort_results=sort_results) 846 if return_matches: 847 return ans 848 self._map_filtered = ans 849 850 def _build_restriction_string(self, restriction): 851 if self.base_restriction: 852 if restriction: 853 return '(%s) and (%s)' % (self.base_restriction, restriction) 854 else: 855 return self.base_restriction 856 else: 857 return restriction 858 859 def search_getting_ids(self, query, search_restriction, 860 set_restriction_count=False, use_virtual_library=True, sort_results=True): 861 if use_virtual_library: 862 search_restriction = self._build_restriction_string(search_restriction) 863 q = '' 864 if not query or not query.strip(): 865 q = search_restriction 866 else: 867 q = query 868 if search_restriction: 869 q = '(%s) and (%s)' % (search_restriction, query) 870 if not q: 871 if set_restriction_count: 872 self.search_restriction_book_count = len(self._map) 873 return list(self._map) 874 matches = self.parse(q) 875 tmap = list(repeat(False, len(self._data))) 876 for x in matches: 877 tmap[x] = True 878 rv = [x for x in self._map if tmap[x]] 879 if set_restriction_count and q == search_restriction: 880 self.search_restriction_book_count = len(rv) 881 return rv 882 883 def get_search_restriction(self): 884 return self.search_restriction 885 886 def set_search_restriction(self, s): 887 self.search_restriction = s 888 889 def get_base_restriction(self): 890 return self.base_restriction 891 892 def set_base_restriction(self, s): 893 self.base_restriction = s 894 895 def get_base_restriction_name(self): 896 return self.base_restriction_name 897 898 def set_base_restriction_name(self, s): 899 self.base_restriction_name = s 900 901 def get_search_restriction_name(self): 902 return self.search_restriction_name 903 904 def set_search_restriction_name(self, s): 905 self.search_restriction_name = s 906 907 def search_restriction_applied(self): 908 return bool(self.search_restriction) or bool(self.base_restriction) 909 910 def get_search_restriction_book_count(self): 911 return self.search_restriction_book_count 912 913 def set_marked_ids(self, id_dict): 914 ''' 915 ids in id_dict are "marked". They can be searched for by 916 using the search term ``marked:true``. Pass in an empty dictionary or 917 set to clear marked ids. 918 919 :param id_dict: Either a dictionary mapping ids to values or a set 920 of ids. In the latter case, the value is set to 'true' for all ids. If 921 a mapping is provided, then the search can be used to search for 922 particular values: ``marked:value`` 923 ''' 924 if not hasattr(id_dict, 'items'): 925 # Simple list. Make it a dict of string 'true' 926 self.marked_ids_dict = dict.fromkeys(id_dict, 'true') 927 else: 928 # Ensure that all the items in the dict are text 929 self.marked_ids_dict = dict(zip(iter(id_dict), map(str, 930 itervalues(id_dict)))) 931 932 # Set the values in the cache 933 marked_col = self.FIELD_MAP['marked'] 934 for r in self.iterall(): 935 r[marked_col] = None 936 937 for id_, val in iteritems(self.marked_ids_dict): 938 try: 939 self._data[id_][marked_col] = val 940 except: 941 pass 942 943 def get_marked(self, idx, index_is_id=True, default_value=None): 944 id_ = idx if index_is_id else self[idx][0] 945 return self.marked_ids_dict.get(id_, default_value) 946 947 # }}} 948 949 def remove(self, id): 950 try: 951 self._uuid_map.pop(self._data[id][self._uuid_column_index], None) 952 except (IndexError, TypeError): 953 pass # id is out of bounds -- no uuid in the map to remove 954 try: 955 self._data[id] = None 956 except IndexError: 957 pass # id is out of bounds, no point setting it to None anyway 958 try: 959 self._map.remove(id) 960 except ValueError: 961 pass 962 try: 963 self._map_filtered.remove(id) 964 except ValueError: 965 pass 966 967 def set(self, row, col, val, row_is_id=False): 968 id = row if row_is_id else self._map_filtered[row] 969 d = self._data[id] 970 if col == self._uuid_column_index: 971 self._uuid_map.pop(d[col], None) 972 d[col] = val 973 if col == self._uuid_column_index: 974 self._uuid_map[val] = id 975 d.refresh_composites() 976 977 def get(self, row, col, row_is_id=False): 978 id = row if row_is_id else self._map_filtered[row] 979 return self._data[id][col] 980 981 def index(self, id, cache=False): 982 x = self._map if cache else self._map_filtered 983 return x.index(id) 984 985 def row(self, id): 986 return self.index(id) 987 988 def has_id(self, id): 989 try: 990 return self._data[id] is not None 991 except IndexError: 992 pass 993 return False 994 995 def refresh_ids(self, db, ids): 996 ''' 997 Refresh the data in the cache for books identified by ids. 998 Returns a list of affected rows or None if the rows are filtered. 999 ''' 1000 for id in ids: 1001 try: 1002 self._data[id] = CacheRow(db, self.composites, self.datetimes, 1003 db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0], 1004 self.series_col, self.series_sort_col) 1005 self._data[id].append(db.book_on_device_string(id)) 1006 self._data[id].append(self.marked_ids_dict.get(id, None)) 1007 self._data[id].append(None) 1008 self._uuid_map[self._data[id][self._uuid_column_index]] = id 1009 except IndexError: 1010 return None 1011 try: 1012 return list(map(self.row, ids)) 1013 except ValueError: 1014 pass 1015 return None 1016 1017 def books_added(self, ids, db): 1018 if not ids: 1019 return 1020 self._data.extend(repeat(None, max(ids)-len(self._data)+2)) 1021 for id in ids: 1022 self._data[id] = CacheRow(db, self.composites, self.datetimes, 1023 db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0], 1024 self.series_col, self.series_sort_col) 1025 self._data[id].append(db.book_on_device_string(id)) 1026 self._data[id].append(self.marked_ids_dict.get(id, None)) 1027 self._data[id].append(None) # Series sort column 1028 self._uuid_map[self._data[id][self._uuid_column_index]] = id 1029 self._map[0:0] = ids 1030 self._map_filtered[0:0] = ids 1031 1032 def books_deleted(self, ids): 1033 for id in ids: 1034 self.remove(id) 1035 1036 def count(self): 1037 return len(self._map) 1038 1039 def refresh_ondevice(self, db): 1040 ondevice_col = self.FIELD_MAP['ondevice'] 1041 for item in self._data: 1042 if item is not None: 1043 item[ondevice_col] = db.book_on_device_string(item[0]) 1044 item.refresh_composites() 1045 1046 def refresh(self, db, field=None, ascending=True): 1047 # reinitialize the template cache in case a composite column has changed 1048 db.initialize_template_cache() 1049 1050 temp = db.conn.get('SELECT * FROM meta2') 1051 self._data = list(repeat(None, temp[-1][0]+2)) if temp else [] 1052 for r in temp: 1053 self._data[r[0]] = CacheRow(db, self.composites, self.datetimes, r, 1054 self.series_col, self.series_sort_col) 1055 self._uuid_map[self._data[r[0]][self._uuid_column_index]] = r[0] 1056 1057 for item in self._data: 1058 if item is not None: 1059 item.append(db.book_on_device_string(item[0])) 1060 # Temp mark and series_sort columns 1061 item.extend((None, None)) 1062 1063 marked_col = self.FIELD_MAP['marked'] 1064 for id_,val in iteritems(self.marked_ids_dict): 1065 try: 1066 self._data[id_][marked_col] = val 1067 except: 1068 pass 1069 1070 self._map = [i[0] for i in self._data if i is not None] 1071 if field is not None: 1072 self.sort(field, ascending) 1073 self._map_filtered = list(self._map) 1074 if self.search_restriction or self.base_restriction: 1075 self.search('', return_matches=False) 1076 1077 # Sorting functions {{{ 1078 1079 def sanitize_sort_field_name(self, field): 1080 field = self.field_metadata.search_term_to_field_key(field.lower().strip()) 1081 # translate some fields to their hidden equivalent 1082 if field == 'title': 1083 field = 'sort' 1084 elif field == 'authors': 1085 field = 'author_sort' 1086 return field 1087 1088 def sort(self, field, ascending, subsort=False): 1089 self.multisort([(field, ascending)]) 1090 1091 def multisort(self, fields=[], subsort=False, only_ids=None): 1092 ''' 1093 fields is a list of 2-tuple, each tuple is of the form 1094 (field_name, is_ascending) 1095 1096 If only_ids is a list of ids, this function will sort that list instead 1097 of the internal mapping of ids. 1098 ''' 1099 fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields] 1100 keys = self.field_metadata.sortable_field_keys() 1101 fields = [x for x in fields if x[0] in keys] 1102 if subsort and 'sort' not in [x[0] for x in fields]: 1103 fields += [('sort', True)] 1104 if not fields: 1105 fields = [('timestamp', False)] 1106 1107 keyg = SortKeyGenerator(fields, self.field_metadata, self._data, self.db_prefs) 1108 if only_ids is None: 1109 self._map.sort(key=keyg) 1110 1111 tmap = list(repeat(False, len(self._data))) 1112 for x in self._map_filtered: 1113 tmap[x] = True 1114 self._map_filtered = [x for x in self._map if tmap[x]] 1115 else: 1116 only_ids.sort(key=keyg) 1117 1118 1119class SortKey: 1120 1121 def __init__(self, orders, values): 1122 self.orders, self.values = orders, values 1123 1124 def compare_to_other(self, other): 1125 for i, ascending in enumerate(self.orders): 1126 ans = cmp(self.values[i], other.values[i]) 1127 if ans != 0: 1128 return ans * ascending 1129 return 0 1130 1131 def __eq__(self, other): 1132 return self.compare_to_other(other) == 0 1133 1134 def __ne__(self, other): 1135 return self.compare_to_other(other) != 0 1136 1137 def __lt__(self, other): 1138 return self.compare_to_other(other) < 0 1139 1140 def __le__(self, other): 1141 return self.compare_to_other(other) <= 0 1142 1143 def __gt__(self, other): 1144 return self.compare_to_other(other) > 0 1145 1146 def __ge__(self, other): 1147 return self.compare_to_other(other) >= 0 1148 1149 1150class SortKeyGenerator: 1151 1152 def __init__(self, fields, field_metadata, data, db_prefs): 1153 from calibre.utils.icu import sort_key 1154 self.field_metadata = field_metadata 1155 self.db_prefs = db_prefs 1156 self.orders = [1 if x[1] else -1 for x in fields] 1157 self.entries = [(x[0], field_metadata[x[0]]) for x in fields] 1158 self.library_order = tweaks['title_series_sorting'] == 'library_order' 1159 self.data = data 1160 self.string_sort_key = sort_key 1161 self.lang_idx = field_metadata['languages']['rec_index'] 1162 1163 def __call__(self, record): 1164 values = tuple(self.itervals(self.data[record])) 1165 return SortKey(self.orders, values) 1166 1167 def itervals(self, record): 1168 for name, fm in self.entries: 1169 dt = fm['datatype'] 1170 val = record[fm['rec_index']] 1171 if dt == 'composite': 1172 sb = fm['display'].get('composite_sort', 'text') 1173 if sb == 'date': 1174 try: 1175 val = parse_date(val) 1176 except: 1177 val = UNDEFINED_DATE 1178 dt = 'datetime' 1179 elif sb == 'number': 1180 try: 1181 p = 1 1182 for i, candidate in enumerate( 1183 ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB')): 1184 if val.endswith(candidate): 1185 p = 1024**(i) 1186 val = val[:-len(candidate)].strip() 1187 break 1188 val = locale.atof(val) * p 1189 except: 1190 val = 0.0 1191 dt = 'float' 1192 elif sb == 'bool': 1193 val = force_to_bool(val) 1194 dt = 'bool' 1195 1196 if dt == 'datetime': 1197 if val is None: 1198 val = UNDEFINED_DATE 1199 if tweaks['sort_dates_using_visible_fields']: 1200 format = None 1201 if name == 'timestamp': 1202 format = tweaks['gui_timestamp_display_format'] 1203 elif name == 'pubdate': 1204 format = tweaks['gui_pubdate_display_format'] 1205 elif name == 'last_modified': 1206 format = tweaks['gui_last_modified_display_format'] 1207 elif fm['is_custom']: 1208 format = fm['display'].get('date_format', None) 1209 val = clean_date_for_sort(val, format) 1210 elif dt == 'series': 1211 if val is None: 1212 val = ('', 1) 1213 else: 1214 if self.library_order: 1215 try: 1216 lang = record[self.lang_idx].partition(',')[0] 1217 except (AttributeError, ValueError, KeyError, 1218 IndexError, TypeError): 1219 lang = None 1220 val = title_sort(val, order='library_order', lang=lang) 1221 sidx_fm = self.field_metadata[name + '_index'] 1222 sidx = record[sidx_fm['rec_index']] 1223 val = (self.string_sort_key(val), sidx) 1224 1225 elif dt in ('text', 'comments', 'composite', 'enumeration'): 1226 if val: 1227 if fm['is_multiple']: 1228 jv = fm['is_multiple']['list_to_ui'] 1229 sv = fm['is_multiple']['cache_to_list'] 1230 if '&' in jv: 1231 val = jv.join( 1232 [author_to_author_sort(v) for v in val.split(sv)]) 1233 else: 1234 val = jv.join(sorted(val.split(sv), 1235 key=self.string_sort_key)) 1236 val = self.string_sort_key(val) 1237 1238 elif dt == 'bool': 1239 if not self.db_prefs.get('bools_are_tristate'): 1240 val = {True: 1, False: 2, None: 2}.get(val, 2) 1241 else: 1242 val = {True: 1, False: 2, None: 3}.get(val, 3) 1243 1244 yield val 1245 1246 # }}} 1247 1248# }}} 1249