1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3# License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
4
5
6import errno
7import json
8import os
9import tempfile
10import time
11from hashlib import sha1
12from itertools import count
13
14from calibre import walk
15from calibre.constants import cache_dir, iswindows
16from calibre.ptempfile import TemporaryFile
17from calibre.srv.render_book import RENDER_VERSION
18from calibre.utils.filenames import rmtree
19from calibre.utils.ipc.simple_worker import start_pipe_worker
20from calibre.utils.lock import ExclusiveFile
21from calibre.utils.serialize import msgpack_dumps
22from calibre.utils.short_uuid import uuid4
23from polyglot.builtins import as_bytes, as_unicode, iteritems
24
25DAY = 24 * 3600
26VIEWER_VERSION = 1
27td_counter = count()
28
29
30def book_cache_dir():
31    return getattr(book_cache_dir, 'override', os.path.join(cache_dir(), 'ev2'))
32
33
34def cache_lock():
35    return ExclusiveFile(os.path.join(book_cache_dir(), 'metadata.json'))
36
37
38def book_hash(path, size, mtime):
39    path = os.path.normcase(os.path.abspath(path))
40    raw = json.dumps((path, size, mtime, RENDER_VERSION, VIEWER_VERSION))
41    if not isinstance(raw, bytes):
42        raw = raw.encode('utf-8')
43    return as_unicode(sha1(raw).hexdigest())
44
45
46def safe_makedirs(path):
47    try:
48        os.makedirs(path)
49    except OSError as err:
50        if err.errno != errno.EEXIST:
51            raise
52    return path
53
54
55def robust_rmtree(x):
56    retries = 2 if iswindows else 1  # retry on windows to get around the idiotic mandatory file locking
57    for i in range(retries):
58        try:
59            try:
60                rmtree(x)
61            except UnicodeDecodeError:
62                rmtree(as_bytes(x))
63            return True
64        except OSError:
65            time.sleep(0.1)
66    return False
67
68
69def robust_rename(a, b):
70    retries = 20 if iswindows else 1  # retry on windows to get around the idiotic mandatory file locking
71    for i in range(retries):
72        try:
73            os.rename(a, b)
74            return True
75        except OSError:
76            time.sleep(0.1)
77    return False
78
79
80def clear_temp(temp_path):
81    now = time.time()
82    for x in os.listdir(temp_path):
83        x = os.path.join(temp_path, x)
84        mtime = os.path.getmtime(x)
85        if now - mtime > DAY:
86            robust_rmtree(x)
87
88
89def expire_cache(path, instances, max_age):
90    now = time.time()
91    remove = [x for x in instances if now - x['atime'] > max_age and x['status'] == 'finished']
92    for instance in remove:
93        if robust_rmtree(os.path.join(path, instance['path'])):
94            instances.remove(instance)
95
96
97def expire_old_versions(path, instances):
98    instances = filter(lambda x: x['status'] == 'finished', instances)
99    remove = sorted(instances, key=lambda x: x['atime'], reverse=True)[1:]
100    for instance in remove:
101        if robust_rmtree(os.path.join(path, instance['path'])):
102            yield instance
103
104
105def expire_cache_and_temp(temp_path, finished_path, metadata, max_age, force_expire):
106    now = time.time()
107    if now - metadata['last_clear_at'] < DAY and max_age >= 0 and not force_expire:
108        return
109    clear_temp(temp_path)
110    entries = metadata['entries']
111    path_key_map = {}
112    for key, instances in tuple(entries.items()):
113        if instances:
114            expire_cache(finished_path, instances, max_age)
115            if not instances:
116                del entries[key]
117            else:
118                for x in instances:
119                    book_path = x.get('book_path')
120                    if book_path:
121                        path_key_map.setdefault(book_path, []).append(key)
122    for keys in path_key_map.values():
123        instances = []
124        for key in keys:
125            instances += entries.get(key, [])
126        if len(instances) > 1:
127            removed = tuple(expire_old_versions(finished_path, instances))
128            if removed:
129                for r in removed:
130                    rkey = r['key']
131                    if rkey in entries:
132                        try:
133                            entries[rkey].remove(r)
134                        except ValueError:
135                            pass
136                        if not entries[rkey]:
137                            del entries[rkey]
138    metadata['last_clear_at'] = now
139
140
141def prepare_convert(temp_path, key, st, book_path):
142    tdir = tempfile.mkdtemp(dir=temp_path, prefix=f'c{next(td_counter)}-')
143    now = time.time()
144    return {
145        'path': os.path.basename(tdir),
146        'id': uuid4(),
147        'status': 'working',
148        'mtime': now,
149        'atime': now,
150        'key': key,
151        'file_mtime': st.st_mtime,
152        'file_size': st.st_size,
153        'cache_size': 0,
154        'book_path': book_path,
155    }
156
157
158class ConversionFailure(ValueError):
159
160    def __init__(self, book_path, worker_output):
161        self.book_path = book_path
162        self.worker_output = worker_output
163        ValueError.__init__(
164                self, 'Failed to convert book: {} with error:\n{}'.format(book_path, worker_output))
165
166
167running_workers = []
168
169
170def clean_running_workers():
171    for p in running_workers:
172        if p.poll() is None:
173            p.kill()
174    del running_workers[:]
175
176
177def do_convert(path, temp_path, key, instance):
178    tdir = os.path.join(temp_path, instance['path'])
179    p = None
180    try:
181        with TemporaryFile('log.txt') as logpath:
182            with open(logpath, 'w+b') as logf:
183                p = start_pipe_worker('from calibre.srv.render_book import viewer_main; viewer_main()', stdout=logf, stderr=logf)
184                running_workers.append(p)
185                p.stdin.write(msgpack_dumps((
186                    path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key},
187                    )))
188                p.stdin.close()
189            if p.wait() != 0:
190                with lopen(logpath, 'rb') as logf:
191                    worker_output = logf.read().decode('utf-8', 'replace')
192                raise ConversionFailure(path, worker_output)
193    finally:
194        try:
195            running_workers.remove(p)
196        except Exception:
197            pass
198    size = 0
199    for f in walk(tdir):
200        size += os.path.getsize(f)
201    instance['cache_size'] = size
202
203
204def save_metadata(metadata, f):
205    f.seek(0), f.truncate(), f.write(as_bytes(json.dumps(metadata, indent=2)))
206
207
208def prepare_book(path, convert_func=do_convert, max_age=30 * DAY, force=False, prepare_notify=None, force_expire=False):
209    st = os.stat(path)
210    key = book_hash(path, st.st_size, st.st_mtime)
211    finished_path = safe_makedirs(os.path.join(book_cache_dir(), 'f'))
212    temp_path = safe_makedirs(os.path.join(book_cache_dir(), 't'))
213
214    with cache_lock() as f:
215        try:
216            metadata = json.loads(f.read())
217        except ValueError:
218            metadata = {'entries': {}, 'last_clear_at': 0}
219        entries = metadata['entries']
220        instances = entries.setdefault(key, [])
221        for instance in tuple(instances):
222            if instance['status'] == 'finished':
223                if force:
224                    robust_rmtree(os.path.join(finished_path, instance['path']))
225                    instances.remove(instance)
226                else:
227                    instance['atime'] = time.time()
228                    save_metadata(metadata, f)
229                    return os.path.join(finished_path, instance['path'])
230        if prepare_notify:
231            prepare_notify()
232        instance = prepare_convert(temp_path, key, st, path)
233        instances.append(instance)
234        save_metadata(metadata, f)
235    convert_func(path, temp_path, key, instance)
236    src_path = os.path.join(temp_path, instance['path'])
237    with cache_lock() as f:
238        ans = tempfile.mkdtemp(dir=finished_path, prefix=f'c{next(td_counter)}-')
239        instance['path'] = os.path.basename(ans)
240        try:
241            metadata = json.loads(f.read())
242        except ValueError:
243            metadata = {'entries': {}, 'last_clear_at': 0}
244        entries = metadata['entries']
245        instances = entries.setdefault(key, [])
246        os.rmdir(ans)
247        if not robust_rename(src_path, ans):
248            raise Exception((
249                'Failed to rename: "{}" to "{}" probably some software such as an antivirus or file sync program'
250                ' running on your computer has locked the files'
251            ).format(src_path, ans))
252
253        instance['status'] = 'finished'
254        for q in instances:
255            if q['id'] == instance['id']:
256                q.update(instance)
257                break
258        expire_cache_and_temp(temp_path, finished_path, metadata, max_age, force_expire)
259        save_metadata(metadata, f)
260    return ans
261
262
263def update_book(path, old_stat, name_data_map=None):
264    old_key = book_hash(path, old_stat.st_size, old_stat.st_mtime)
265    finished_path = safe_makedirs(os.path.join(book_cache_dir(), 'f'))
266
267    with cache_lock() as f:
268        st = os.stat(path)
269        new_key = book_hash(path, st.st_size, st.st_mtime)
270        if old_key == new_key:
271            return
272        try:
273            metadata = json.loads(f.read())
274        except ValueError:
275            metadata = {'entries': {}, 'last_clear_at': 0}
276        entries = metadata['entries']
277        instances = entries.get(old_key)
278        if not instances:
279            return
280        for instance in tuple(instances):
281            if instance['status'] == 'finished':
282                entries.setdefault(new_key, []).append(instance)
283                instances.remove(instance)
284                if not instances:
285                    del entries[old_key]
286                instance['file_mtime'] = st.st_mtime
287                instance['file_size'] = st.st_size
288                if name_data_map:
289                    for name, data in iteritems(name_data_map):
290                        with open(os.path.join(finished_path, instance['path'], name), 'wb') as f2:
291                            f2.write(data)
292                save_metadata(metadata, f)
293                return
294
295
296def find_tests():
297    import unittest
298
299    class TestViewerCache(unittest.TestCase):
300        ae = unittest.TestCase.assertEqual
301
302        def setUp(self):
303            self.tdir = tempfile.mkdtemp()
304            book_cache_dir.override = os.path.join(self.tdir, 'ev2')
305
306        def tearDown(self):
307            rmtree(self.tdir)
308            del book_cache_dir.override
309
310        def test_viewer_cache(self):
311
312            def convert_mock(path, temp_path, key, instance):
313                self.ae(instance['status'], 'working')
314                self.ae(instance['key'], key)
315                open(os.path.join(temp_path, instance['path'], 'sentinel'), 'wb').write(b'test')
316
317            def set_data(x):
318                if not isinstance(x, bytes):
319                    x = x.encode('utf-8')
320                with open(book_src, 'wb') as f:
321                    f.write(x)
322
323            book_src = os.path.join(self.tdir, 'book.epub')
324            set_data('a')
325            path = prepare_book(book_src, convert_func=convert_mock)
326            self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'test')
327
328            # Test that opening the same book uses the cache
329            second_path = prepare_book(book_src, convert_func=convert_mock)
330            self.ae(path, second_path)
331
332            # Test that changing the book updates the cache
333            set_data('bc')
334            third_path = prepare_book(book_src, convert_func=convert_mock)
335            self.assertNotEqual(path, third_path)
336
337            # Test force reload
338            fourth_path = prepare_book(book_src, convert_func=convert_mock)
339            self.ae(third_path, fourth_path)
340            fourth_path = prepare_book(book_src, convert_func=convert_mock, force=True)
341            self.assertNotEqual(third_path, fourth_path)
342
343            # Test cache expiry
344            set_data('bcd')
345            prepare_book(book_src, convert_func=convert_mock, max_age=-1000)
346            self.ae([], os.listdir(os.path.join(book_cache_dir(), 'f')))
347
348            # Test modifying a book and opening it repeatedly leaves only
349            # a single entry for it in the cache
350            opath = prepare_book(book_src, convert_func=convert_mock, force_expire=True)
351            finished_entries = os.listdir(os.path.join(book_cache_dir(), 'f'))
352            self.ae(len(finished_entries), 1)
353            set_data('bcde' * 4096)
354            npath = prepare_book(book_src, convert_func=convert_mock, force_expire=True)
355            new_finished_entries = os.listdir(os.path.join(book_cache_dir(), 'f'))
356            self.ae(len(new_finished_entries), 1)
357            self.assertNotEqual(opath, npath)
358            set_data('bcdef')
359            prepare_book(book_src, convert_func=convert_mock, max_age=-1000, force_expire=True)
360            self.ae([], os.listdir(os.path.join(book_cache_dir(), 'f')))
361            with cache_lock() as f:
362                metadata = json.loads(f.read())
363                self.assertEqual(metadata['entries'], {})
364
365            # Test updating cached book
366            book_src = os.path.join(self.tdir, 'book2.epub')
367            set_data('bb')
368            path = prepare_book(book_src, convert_func=convert_mock)
369            self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'test')
370            bs = os.stat(book_src)
371            set_data('cde')
372            update_book(book_src, bs, name_data_map={'sentinel': b'updated'})
373            self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'updated')
374            self.ae(1, len(os.listdir(os.path.join(book_cache_dir(), 'f'))))
375            with cache_lock() as f:
376                metadata = json.loads(f.read())
377            self.ae(len(metadata['entries']), 1)
378            entry = list(metadata['entries'].values())[0]
379            self.ae(len(entry), 1)
380            entry = entry[0]
381            st = os.stat(book_src)
382            self.ae(entry['file_size'], st.st_size)
383            self.ae(entry['file_mtime'], st.st_mtime)
384
385    return unittest.defaultTestLoader.loadTestsFromTestCase(TestViewerCache)
386