1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' 7 8import os, errno, sys, re 9from locale import localeconv 10from collections import OrderedDict, namedtuple 11from polyglot.builtins import iteritems, itervalues, string_or_bytes 12from threading import Lock 13 14from calibre import as_unicode, prints 15from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding 16 17from calibre.utils.localization import canonicalize_lang 18 19 20def force_to_bool(val): 21 if isinstance(val, (bytes, str)): 22 if isinstance(val, bytes): 23 val = val.decode(preferred_encoding, 'replace') 24 try: 25 val = icu_lower(val) 26 if not val: 27 val = None 28 elif val in [_('yes'), _('checked'), 'true', 'yes']: 29 val = True 30 elif val in [_('no'), _('unchecked'), 'false', 'no']: 31 val = False 32 else: 33 val = bool(int(val)) 34 except: 35 val = None 36 return val 37 38 39_fuzzy_title_patterns = None 40 41 42def fuzzy_title_patterns(): 43 global _fuzzy_title_patterns 44 if _fuzzy_title_patterns is None: 45 from calibre.ebooks.metadata import get_title_sort_pat 46 _fuzzy_title_patterns = tuple((re.compile(pat, re.IGNORECASE) if 47 isinstance(pat, string_or_bytes) else pat, repl) for pat, repl in 48 [ 49 (r'[\[\](){}<>\'";,:#]', ''), 50 (get_title_sort_pat(), ''), 51 (r'[-._]', ' '), 52 (r'\s+', ' ') 53 ] 54 ) 55 return _fuzzy_title_patterns 56 57 58def fuzzy_title(title): 59 title = icu_lower(title.strip()) 60 for pat, repl in fuzzy_title_patterns(): 61 title = pat.sub(repl, title) 62 return title 63 64 65def find_identical_books(mi, data): 66 author_map, aid_map, title_map, lang_map = data 67 found_books = None 68 for a in mi.authors: 69 author_ids = author_map.get(icu_lower(a)) 70 if author_ids is None: 71 return set() 72 books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())} 73 if found_books is None: 74 found_books = books_by_author 75 else: 76 found_books &= books_by_author 77 if not found_books: 78 return set() 79 80 ans = set() 81 titleq = fuzzy_title(mi.title) 82 for book_id in found_books: 83 title = title_map.get(book_id, '') 84 if fuzzy_title(title) == titleq: 85 ans.add(book_id) 86 87 langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ()))) 88 if not langq: 89 return ans 90 91 def lang_matches(book_id): 92 book_langq = lang_map.get(book_id) 93 return not book_langq or langq == book_langq 94 95 return {book_id for book_id in ans if lang_matches(book_id)} 96 97 98Entry = namedtuple('Entry', 'path size timestamp thumbnail_size') 99 100 101class CacheError(Exception): 102 pass 103 104 105class ThumbnailCache: 106 107 ' This is a persistent disk cache to speed up loading and resizing of covers ' 108 109 def __init__(self, 110 max_size=1024, # The maximum disk space in MB 111 name='thumbnail-cache', # The name of this cache (should be unique in location) 112 thumbnail_size=(100, 100), # The size of the thumbnails, can be changed 113 location=None, # The location for this cache, if None cache_dir() is used 114 test_mode=False, # Used for testing 115 min_disk_cache=0): # If the size is set less than or equal to this value, the cache is disabled. 116 self.location = os.path.join(location or cache_dir(), name) 117 if max_size <= min_disk_cache: 118 max_size = 0 119 self.max_size = int(max_size * (1024**2)) 120 self.group_id = 'group' 121 self.thumbnail_size = thumbnail_size 122 self.size_changed = False 123 self.lock = Lock() 124 self.min_disk_cache = min_disk_cache 125 if test_mode: 126 self.log = self.fail_on_error 127 128 def log(self, *args, **kwargs): 129 kwargs['file'] = sys.stderr 130 prints(*args, **kwargs) 131 132 def fail_on_error(self, *args, **kwargs): 133 msg = ' '.join(args) 134 raise CacheError(msg) 135 136 def _do_delete(self, path): 137 try: 138 os.remove(path) 139 except OSError as err: 140 self.log('Failed to delete cached thumbnail file:', as_unicode(err)) 141 142 def _load_index(self): 143 'Load the index, automatically removing incorrectly sized thumbnails and pruning to fit max_size' 144 try: 145 os.makedirs(self.location) 146 except OSError as err: 147 if err.errno != errno.EEXIST: 148 self.log('Failed to make thumbnail cache dir:', as_unicode(err)) 149 self.total_size = 0 150 self.items = OrderedDict() 151 order = self._read_order() 152 153 def listdir(*args): 154 try: 155 return os.listdir(os.path.join(*args)) 156 except OSError: 157 return () # not a directory or no permission or whatever 158 entries = ('/'.join((parent, subdir, entry)) 159 for parent in listdir(self.location) 160 for subdir in listdir(self.location, parent) 161 for entry in listdir(self.location, parent, subdir)) 162 163 invalidate = set() 164 try: 165 with open(os.path.join(self.location, 'invalidate'), 'rb') as f: 166 raw = f.read().decode('utf-8') 167 except OSError as err: 168 if getattr(err, 'errno', None) != errno.ENOENT: 169 self.log('Failed to read thumbnail invalidate data:', as_unicode(err)) 170 else: 171 try: 172 os.remove(os.path.join(self.location, 'invalidate')) 173 except OSError as err: 174 self.log('Failed to remove thumbnail invalidate data:', as_unicode(err)) 175 else: 176 def record(line): 177 try: 178 uuid, book_id = line.partition(' ')[0::2] 179 book_id = int(book_id) 180 return (uuid, book_id) 181 except Exception: 182 return None 183 invalidate = {record(x) for x in raw.splitlines()} 184 items = [] 185 try: 186 for entry in entries: 187 try: 188 uuid, name = entry.split('/')[0::2] 189 book_id, timestamp, size, thumbnail_size = name.split('-') 190 book_id, timestamp, size = int(book_id), float(timestamp), int(size) 191 thumbnail_size = tuple(map(int, thumbnail_size.partition('x')[0::2])) 192 except (ValueError, TypeError, IndexError, KeyError, AttributeError): 193 continue 194 key = (uuid, book_id) 195 path = os.path.join(self.location, entry) 196 if self.thumbnail_size == thumbnail_size and key not in invalidate: 197 items.append((key, Entry(path, size, timestamp, thumbnail_size))) 198 self.total_size += size 199 else: 200 self._do_delete(path) 201 except OSError as err: 202 self.log('Failed to read thumbnail cache dir:', as_unicode(err)) 203 204 self.items = OrderedDict(sorted(items, key=lambda x:order.get(x[0], 0))) 205 self._apply_size() 206 207 def _invalidate_sizes(self): 208 if self.size_changed: 209 size = self.thumbnail_size 210 remove = tuple(key for key, entry in iteritems(self.items) if size != entry.thumbnail_size) 211 for key in remove: 212 self._remove(key) 213 self.size_changed = False 214 215 def _remove(self, key): 216 entry = self.items.pop(key, None) 217 if entry is not None: 218 self._do_delete(entry.path) 219 self.total_size -= entry.size 220 221 def _apply_size(self): 222 while self.total_size > self.max_size and self.items: 223 entry = self.items.popitem(last=False)[1] 224 self._do_delete(entry.path) 225 self.total_size -= entry.size 226 227 def _write_order(self): 228 if hasattr(self, 'items'): 229 try: 230 data = '\n'.join(group_id + ' ' + str(book_id) for (group_id, book_id) in self.items) 231 with lopen(os.path.join(self.location, 'order'), 'wb') as f: 232 f.write(data.encode('utf-8')) 233 except OSError as err: 234 self.log('Failed to save thumbnail cache order:', as_unicode(err)) 235 236 def _read_order(self): 237 order = {} 238 try: 239 with lopen(os.path.join(self.location, 'order'), 'rb') as f: 240 for line in f.read().decode('utf-8').splitlines(): 241 parts = line.split(' ', 1) 242 if len(parts) == 2: 243 order[(parts[0], int(parts[1]))] = len(order) 244 except Exception as err: 245 if getattr(err, 'errno', None) != errno.ENOENT: 246 self.log('Failed to load thumbnail cache order:', as_unicode(err)) 247 return order 248 249 def shutdown(self): 250 with self.lock: 251 self._write_order() 252 253 def set_group_id(self, group_id): 254 with self.lock: 255 self.group_id = group_id 256 257 def set_thumbnail_size(self, width, height): 258 new_size = (width, height) 259 with self.lock: 260 if new_size != self.thumbnail_size: 261 self.thumbnail_size = new_size 262 self.size_changed = True 263 return True 264 return False 265 266 def insert(self, book_id, timestamp, data): 267 if self.max_size < len(data): 268 return 269 with self.lock: 270 if not hasattr(self, 'total_size'): 271 self._load_index() 272 self._invalidate_sizes() 273 ts = ('%.2f' % timestamp).replace('.00', '') 274 path = '%s%s%s%s%d-%s-%d-%dx%d' % ( 275 self.group_id, os.sep, book_id % 100, os.sep, 276 book_id, ts, len(data), self.thumbnail_size[0], self.thumbnail_size[1]) 277 path = os.path.join(self.location, path) 278 key = (self.group_id, book_id) 279 e = self.items.pop(key, None) 280 self.total_size -= getattr(e, 'size', 0) 281 try: 282 with open(path, 'wb') as f: 283 f.write(data) 284 except OSError as err: 285 d = os.path.dirname(path) 286 if not os.path.exists(d): 287 try: 288 os.makedirs(d) 289 with open(path, 'wb') as f: 290 f.write(data) 291 except OSError as err: 292 self.log('Failed to write cached thumbnail:', path, as_unicode(err)) 293 return self._apply_size() 294 else: 295 self.log('Failed to write cached thumbnail:', path, as_unicode(err)) 296 return self._apply_size() 297 self.items[key] = Entry(path, len(data), timestamp, self.thumbnail_size) 298 self.total_size += len(data) 299 self._apply_size() 300 301 def __len__(self): 302 with self.lock: 303 try: 304 return len(self.items) 305 except AttributeError: 306 self._load_index() 307 return len(self.items) 308 309 def __contains__(self, book_id): 310 with self.lock: 311 try: 312 return (self.group_id, book_id) in self.items 313 except AttributeError: 314 self._load_index() 315 return (self.group_id, book_id) in self.items 316 317 def __getitem__(self, book_id): 318 with self.lock: 319 if not hasattr(self, 'total_size'): 320 self._load_index() 321 self._invalidate_sizes() 322 key = (self.group_id, book_id) 323 entry = self.items.pop(key, None) 324 if entry is None: 325 return None, None 326 if entry.thumbnail_size != self.thumbnail_size: 327 try: 328 os.remove(entry.path) 329 except OSError as err: 330 if getattr(err, 'errno', None) != errno.ENOENT: 331 self.log('Failed to remove cached thumbnail:', entry.path, as_unicode(err)) 332 self.total_size -= entry.size 333 return None, None 334 self.items[key] = entry 335 try: 336 with open(entry.path, 'rb') as f: 337 data = f.read() 338 except OSError as err: 339 self.log('Failed to read cached thumbnail:', entry.path, as_unicode(err)) 340 return None, None 341 return data, entry.timestamp 342 343 def invalidate(self, book_ids): 344 with self.lock: 345 if hasattr(self, 'total_size'): 346 for book_id in book_ids: 347 self._remove((self.group_id, book_id)) 348 elif os.path.exists(self.location): 349 try: 350 raw = '\n'.join('%s %d' % (self.group_id, book_id) for book_id in book_ids) 351 with open(os.path.join(self.location, 'invalidate'), 'ab') as f: 352 f.write(raw.encode('ascii')) 353 except OSError as err: 354 self.log('Failed to write invalidate thumbnail record:', as_unicode(err)) 355 356 @property 357 def current_size(self): 358 with self.lock: 359 if not hasattr(self, 'total_size'): 360 self._load_index() 361 return self.total_size 362 363 def empty(self): 364 with self.lock: 365 try: 366 os.remove(os.path.join(self.location, 'order')) 367 except OSError: 368 pass 369 if not hasattr(self, 'total_size'): 370 self._load_index() 371 for entry in itervalues(self.items): 372 self._do_delete(entry.path) 373 self.total_size = 0 374 self.items = OrderedDict() 375 376 def __hash__(self): 377 return id(self) 378 379 def set_size(self, size_in_mb): 380 if size_in_mb <= self.min_disk_cache: 381 size_in_mb = 0 382 size_in_mb = max(0, size_in_mb) 383 with self.lock: 384 self.max_size = int(size_in_mb * (1024**2)) 385 if hasattr(self, 'total_size'): 386 self._apply_size() 387 388 389number_separators = None 390 391 392def atof(string): 393 # Python 2.x does not handle unicode number separators correctly, so we 394 # have to implement our own 395 global number_separators 396 if number_separators is None: 397 if iswindows: 398 number_separators = get_windows_number_formats() 399 else: 400 lc = localeconv() 401 t, d = lc['thousands_sep'], lc['decimal_point'] 402 if isinstance(t, bytes): 403 t = t.decode('utf-8', 'ignore') or ',' 404 if isinstance(d, bytes): 405 d = d.decode('utf-8', 'ignore') or '.' 406 number_separators = t, d 407 return float(string.replace(number_separators[1], '.').replace(number_separators[0], '')) 408 409 410def type_safe_sort_key_function(keyfunc=None): 411 if keyfunc is None: 412 keyfunc = lambda x: x 413 sentinel = object() 414 first_value = sentinel 415 416 def key(x): 417 nonlocal first_value 418 ans = keyfunc(x) 419 if first_value is sentinel: 420 first_value = ans 421 else: 422 try: 423 ans < first_value 424 first_value < ans 425 except TypeError: 426 ans = first_value 427 return ans 428 429 return key 430