1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net> 4 5 6import errno 7import json 8import os 9import tempfile 10import time 11from hashlib import sha1 12from itertools import count 13 14from calibre import walk 15from calibre.constants import cache_dir, iswindows 16from calibre.ptempfile import TemporaryFile 17from calibre.srv.render_book import RENDER_VERSION 18from calibre.utils.filenames import rmtree 19from calibre.utils.ipc.simple_worker import start_pipe_worker 20from calibre.utils.lock import ExclusiveFile 21from calibre.utils.serialize import msgpack_dumps 22from calibre.utils.short_uuid import uuid4 23from polyglot.builtins import as_bytes, as_unicode, iteritems 24 25DAY = 24 * 3600 26VIEWER_VERSION = 1 27td_counter = count() 28 29 30def book_cache_dir(): 31 return getattr(book_cache_dir, 'override', os.path.join(cache_dir(), 'ev2')) 32 33 34def cache_lock(): 35 return ExclusiveFile(os.path.join(book_cache_dir(), 'metadata.json')) 36 37 38def book_hash(path, size, mtime): 39 path = os.path.normcase(os.path.abspath(path)) 40 raw = json.dumps((path, size, mtime, RENDER_VERSION, VIEWER_VERSION)) 41 if not isinstance(raw, bytes): 42 raw = raw.encode('utf-8') 43 return as_unicode(sha1(raw).hexdigest()) 44 45 46def safe_makedirs(path): 47 try: 48 os.makedirs(path) 49 except OSError as err: 50 if err.errno != errno.EEXIST: 51 raise 52 return path 53 54 55def robust_rmtree(x): 56 retries = 2 if iswindows else 1 # retry on windows to get around the idiotic mandatory file locking 57 for i in range(retries): 58 try: 59 try: 60 rmtree(x) 61 except UnicodeDecodeError: 62 rmtree(as_bytes(x)) 63 return True 64 except OSError: 65 time.sleep(0.1) 66 return False 67 68 69def robust_rename(a, b): 70 retries = 20 if iswindows else 1 # retry on windows to get around the idiotic mandatory file locking 71 for i in range(retries): 72 try: 73 os.rename(a, b) 74 return True 75 except OSError: 76 time.sleep(0.1) 77 return False 78 79 80def clear_temp(temp_path): 81 now = time.time() 82 for x in os.listdir(temp_path): 83 x = os.path.join(temp_path, x) 84 mtime = os.path.getmtime(x) 85 if now - mtime > DAY: 86 robust_rmtree(x) 87 88 89def expire_cache(path, instances, max_age): 90 now = time.time() 91 remove = [x for x in instances if now - x['atime'] > max_age and x['status'] == 'finished'] 92 for instance in remove: 93 if robust_rmtree(os.path.join(path, instance['path'])): 94 instances.remove(instance) 95 96 97def expire_old_versions(path, instances): 98 instances = filter(lambda x: x['status'] == 'finished', instances) 99 remove = sorted(instances, key=lambda x: x['atime'], reverse=True)[1:] 100 for instance in remove: 101 if robust_rmtree(os.path.join(path, instance['path'])): 102 yield instance 103 104 105def expire_cache_and_temp(temp_path, finished_path, metadata, max_age, force_expire): 106 now = time.time() 107 if now - metadata['last_clear_at'] < DAY and max_age >= 0 and not force_expire: 108 return 109 clear_temp(temp_path) 110 entries = metadata['entries'] 111 path_key_map = {} 112 for key, instances in tuple(entries.items()): 113 if instances: 114 expire_cache(finished_path, instances, max_age) 115 if not instances: 116 del entries[key] 117 else: 118 for x in instances: 119 book_path = x.get('book_path') 120 if book_path: 121 path_key_map.setdefault(book_path, []).append(key) 122 for keys in path_key_map.values(): 123 instances = [] 124 for key in keys: 125 instances += entries.get(key, []) 126 if len(instances) > 1: 127 removed = tuple(expire_old_versions(finished_path, instances)) 128 if removed: 129 for r in removed: 130 rkey = r['key'] 131 if rkey in entries: 132 try: 133 entries[rkey].remove(r) 134 except ValueError: 135 pass 136 if not entries[rkey]: 137 del entries[rkey] 138 metadata['last_clear_at'] = now 139 140 141def prepare_convert(temp_path, key, st, book_path): 142 tdir = tempfile.mkdtemp(dir=temp_path, prefix=f'c{next(td_counter)}-') 143 now = time.time() 144 return { 145 'path': os.path.basename(tdir), 146 'id': uuid4(), 147 'status': 'working', 148 'mtime': now, 149 'atime': now, 150 'key': key, 151 'file_mtime': st.st_mtime, 152 'file_size': st.st_size, 153 'cache_size': 0, 154 'book_path': book_path, 155 } 156 157 158class ConversionFailure(ValueError): 159 160 def __init__(self, book_path, worker_output): 161 self.book_path = book_path 162 self.worker_output = worker_output 163 ValueError.__init__( 164 self, 'Failed to convert book: {} with error:\n{}'.format(book_path, worker_output)) 165 166 167running_workers = [] 168 169 170def clean_running_workers(): 171 for p in running_workers: 172 if p.poll() is None: 173 p.kill() 174 del running_workers[:] 175 176 177def do_convert(path, temp_path, key, instance): 178 tdir = os.path.join(temp_path, instance['path']) 179 p = None 180 try: 181 with TemporaryFile('log.txt') as logpath: 182 with open(logpath, 'w+b') as logf: 183 p = start_pipe_worker('from calibre.srv.render_book import viewer_main; viewer_main()', stdout=logf, stderr=logf) 184 running_workers.append(p) 185 p.stdin.write(msgpack_dumps(( 186 path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, 187 ))) 188 p.stdin.close() 189 if p.wait() != 0: 190 with lopen(logpath, 'rb') as logf: 191 worker_output = logf.read().decode('utf-8', 'replace') 192 raise ConversionFailure(path, worker_output) 193 finally: 194 try: 195 running_workers.remove(p) 196 except Exception: 197 pass 198 size = 0 199 for f in walk(tdir): 200 size += os.path.getsize(f) 201 instance['cache_size'] = size 202 203 204def save_metadata(metadata, f): 205 f.seek(0), f.truncate(), f.write(as_bytes(json.dumps(metadata, indent=2))) 206 207 208def prepare_book(path, convert_func=do_convert, max_age=30 * DAY, force=False, prepare_notify=None, force_expire=False): 209 st = os.stat(path) 210 key = book_hash(path, st.st_size, st.st_mtime) 211 finished_path = safe_makedirs(os.path.join(book_cache_dir(), 'f')) 212 temp_path = safe_makedirs(os.path.join(book_cache_dir(), 't')) 213 214 with cache_lock() as f: 215 try: 216 metadata = json.loads(f.read()) 217 except ValueError: 218 metadata = {'entries': {}, 'last_clear_at': 0} 219 entries = metadata['entries'] 220 instances = entries.setdefault(key, []) 221 for instance in tuple(instances): 222 if instance['status'] == 'finished': 223 if force: 224 robust_rmtree(os.path.join(finished_path, instance['path'])) 225 instances.remove(instance) 226 else: 227 instance['atime'] = time.time() 228 save_metadata(metadata, f) 229 return os.path.join(finished_path, instance['path']) 230 if prepare_notify: 231 prepare_notify() 232 instance = prepare_convert(temp_path, key, st, path) 233 instances.append(instance) 234 save_metadata(metadata, f) 235 convert_func(path, temp_path, key, instance) 236 src_path = os.path.join(temp_path, instance['path']) 237 with cache_lock() as f: 238 ans = tempfile.mkdtemp(dir=finished_path, prefix=f'c{next(td_counter)}-') 239 instance['path'] = os.path.basename(ans) 240 try: 241 metadata = json.loads(f.read()) 242 except ValueError: 243 metadata = {'entries': {}, 'last_clear_at': 0} 244 entries = metadata['entries'] 245 instances = entries.setdefault(key, []) 246 os.rmdir(ans) 247 if not robust_rename(src_path, ans): 248 raise Exception(( 249 'Failed to rename: "{}" to "{}" probably some software such as an antivirus or file sync program' 250 ' running on your computer has locked the files' 251 ).format(src_path, ans)) 252 253 instance['status'] = 'finished' 254 for q in instances: 255 if q['id'] == instance['id']: 256 q.update(instance) 257 break 258 expire_cache_and_temp(temp_path, finished_path, metadata, max_age, force_expire) 259 save_metadata(metadata, f) 260 return ans 261 262 263def update_book(path, old_stat, name_data_map=None): 264 old_key = book_hash(path, old_stat.st_size, old_stat.st_mtime) 265 finished_path = safe_makedirs(os.path.join(book_cache_dir(), 'f')) 266 267 with cache_lock() as f: 268 st = os.stat(path) 269 new_key = book_hash(path, st.st_size, st.st_mtime) 270 if old_key == new_key: 271 return 272 try: 273 metadata = json.loads(f.read()) 274 except ValueError: 275 metadata = {'entries': {}, 'last_clear_at': 0} 276 entries = metadata['entries'] 277 instances = entries.get(old_key) 278 if not instances: 279 return 280 for instance in tuple(instances): 281 if instance['status'] == 'finished': 282 entries.setdefault(new_key, []).append(instance) 283 instances.remove(instance) 284 if not instances: 285 del entries[old_key] 286 instance['file_mtime'] = st.st_mtime 287 instance['file_size'] = st.st_size 288 if name_data_map: 289 for name, data in iteritems(name_data_map): 290 with open(os.path.join(finished_path, instance['path'], name), 'wb') as f2: 291 f2.write(data) 292 save_metadata(metadata, f) 293 return 294 295 296def find_tests(): 297 import unittest 298 299 class TestViewerCache(unittest.TestCase): 300 ae = unittest.TestCase.assertEqual 301 302 def setUp(self): 303 self.tdir = tempfile.mkdtemp() 304 book_cache_dir.override = os.path.join(self.tdir, 'ev2') 305 306 def tearDown(self): 307 rmtree(self.tdir) 308 del book_cache_dir.override 309 310 def test_viewer_cache(self): 311 312 def convert_mock(path, temp_path, key, instance): 313 self.ae(instance['status'], 'working') 314 self.ae(instance['key'], key) 315 open(os.path.join(temp_path, instance['path'], 'sentinel'), 'wb').write(b'test') 316 317 def set_data(x): 318 if not isinstance(x, bytes): 319 x = x.encode('utf-8') 320 with open(book_src, 'wb') as f: 321 f.write(x) 322 323 book_src = os.path.join(self.tdir, 'book.epub') 324 set_data('a') 325 path = prepare_book(book_src, convert_func=convert_mock) 326 self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'test') 327 328 # Test that opening the same book uses the cache 329 second_path = prepare_book(book_src, convert_func=convert_mock) 330 self.ae(path, second_path) 331 332 # Test that changing the book updates the cache 333 set_data('bc') 334 third_path = prepare_book(book_src, convert_func=convert_mock) 335 self.assertNotEqual(path, third_path) 336 337 # Test force reload 338 fourth_path = prepare_book(book_src, convert_func=convert_mock) 339 self.ae(third_path, fourth_path) 340 fourth_path = prepare_book(book_src, convert_func=convert_mock, force=True) 341 self.assertNotEqual(third_path, fourth_path) 342 343 # Test cache expiry 344 set_data('bcd') 345 prepare_book(book_src, convert_func=convert_mock, max_age=-1000) 346 self.ae([], os.listdir(os.path.join(book_cache_dir(), 'f'))) 347 348 # Test modifying a book and opening it repeatedly leaves only 349 # a single entry for it in the cache 350 opath = prepare_book(book_src, convert_func=convert_mock, force_expire=True) 351 finished_entries = os.listdir(os.path.join(book_cache_dir(), 'f')) 352 self.ae(len(finished_entries), 1) 353 set_data('bcde' * 4096) 354 npath = prepare_book(book_src, convert_func=convert_mock, force_expire=True) 355 new_finished_entries = os.listdir(os.path.join(book_cache_dir(), 'f')) 356 self.ae(len(new_finished_entries), 1) 357 self.assertNotEqual(opath, npath) 358 set_data('bcdef') 359 prepare_book(book_src, convert_func=convert_mock, max_age=-1000, force_expire=True) 360 self.ae([], os.listdir(os.path.join(book_cache_dir(), 'f'))) 361 with cache_lock() as f: 362 metadata = json.loads(f.read()) 363 self.assertEqual(metadata['entries'], {}) 364 365 # Test updating cached book 366 book_src = os.path.join(self.tdir, 'book2.epub') 367 set_data('bb') 368 path = prepare_book(book_src, convert_func=convert_mock) 369 self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'test') 370 bs = os.stat(book_src) 371 set_data('cde') 372 update_book(book_src, bs, name_data_map={'sentinel': b'updated'}) 373 self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'updated') 374 self.ae(1, len(os.listdir(os.path.join(book_cache_dir(), 'f')))) 375 with cache_lock() as f: 376 metadata = json.loads(f.read()) 377 self.ae(len(metadata['entries']), 1) 378 entry = list(metadata['entries'].values())[0] 379 self.ae(len(entry), 1) 380 entry = entry[0] 381 st = os.stat(book_src) 382 self.ae(entry['file_size'], st.st_size) 383 self.ae(entry['file_mtime'], st.st_mtime) 384 385 return unittest.defaultTestLoader.loadTestsFromTestCase(TestViewerCache) 386