1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3
4
5__license__ = 'GPL v3'
6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
7
8import os, errno, sys, re
9from locale import localeconv
10from collections import OrderedDict, namedtuple
11from polyglot.builtins import iteritems, itervalues, string_or_bytes
12from threading import Lock
13
14from calibre import as_unicode, prints
15from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding
16
17from calibre.utils.localization import canonicalize_lang
18
19
20def force_to_bool(val):
21    if isinstance(val, (bytes, str)):
22        if isinstance(val, bytes):
23            val = val.decode(preferred_encoding, 'replace')
24        try:
25            val = icu_lower(val)
26            if not val:
27                val = None
28            elif val in [_('yes'), _('checked'), 'true', 'yes']:
29                val = True
30            elif val in [_('no'), _('unchecked'), 'false', 'no']:
31                val = False
32            else:
33                val = bool(int(val))
34        except:
35            val = None
36    return val
37
38
39_fuzzy_title_patterns = None
40
41
42def fuzzy_title_patterns():
43    global _fuzzy_title_patterns
44    if _fuzzy_title_patterns is None:
45        from calibre.ebooks.metadata import get_title_sort_pat
46        _fuzzy_title_patterns = tuple((re.compile(pat, re.IGNORECASE) if
47            isinstance(pat, string_or_bytes) else pat, repl) for pat, repl in
48                [
49                    (r'[\[\](){}<>\'";,:#]', ''),
50                    (get_title_sort_pat(), ''),
51                    (r'[-._]', ' '),
52                    (r'\s+', ' ')
53                ]
54        )
55    return _fuzzy_title_patterns
56
57
58def fuzzy_title(title):
59    title = icu_lower(title.strip())
60    for pat, repl in fuzzy_title_patterns():
61        title = pat.sub(repl, title)
62    return title
63
64
65def find_identical_books(mi, data):
66    author_map, aid_map, title_map, lang_map = data
67    found_books = None
68    for a in mi.authors:
69        author_ids = author_map.get(icu_lower(a))
70        if author_ids is None:
71            return set()
72        books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())}
73        if found_books is None:
74            found_books = books_by_author
75        else:
76            found_books &= books_by_author
77        if not found_books:
78            return set()
79
80    ans = set()
81    titleq = fuzzy_title(mi.title)
82    for book_id in found_books:
83        title = title_map.get(book_id, '')
84        if fuzzy_title(title) == titleq:
85            ans.add(book_id)
86
87    langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ())))
88    if not langq:
89        return ans
90
91    def lang_matches(book_id):
92        book_langq = lang_map.get(book_id)
93        return not book_langq or langq == book_langq
94
95    return {book_id for book_id in ans if lang_matches(book_id)}
96
97
98Entry = namedtuple('Entry', 'path size timestamp thumbnail_size')
99
100
101class CacheError(Exception):
102    pass
103
104
105class ThumbnailCache:
106
107    ' This is a persistent disk cache to speed up loading and resizing of covers '
108
109    def __init__(self,
110                 max_size=1024,  # The maximum disk space in MB
111                 name='thumbnail-cache',  # The name of this cache (should be unique in location)
112                 thumbnail_size=(100, 100),   # The size of the thumbnails, can be changed
113                 location=None,   # The location for this cache, if None cache_dir() is used
114                 test_mode=False,  # Used for testing
115                 min_disk_cache=0):  # If the size is set less than or equal to this value, the cache is disabled.
116        self.location = os.path.join(location or cache_dir(), name)
117        if max_size <= min_disk_cache:
118            max_size = 0
119        self.max_size = int(max_size * (1024**2))
120        self.group_id = 'group'
121        self.thumbnail_size = thumbnail_size
122        self.size_changed = False
123        self.lock = Lock()
124        self.min_disk_cache = min_disk_cache
125        if test_mode:
126            self.log = self.fail_on_error
127
128    def log(self, *args, **kwargs):
129        kwargs['file'] = sys.stderr
130        prints(*args, **kwargs)
131
132    def fail_on_error(self, *args, **kwargs):
133        msg = ' '.join(args)
134        raise CacheError(msg)
135
136    def _do_delete(self, path):
137        try:
138            os.remove(path)
139        except OSError as err:
140            self.log('Failed to delete cached thumbnail file:', as_unicode(err))
141
142    def _load_index(self):
143        'Load the index, automatically removing incorrectly sized thumbnails and pruning to fit max_size'
144        try:
145            os.makedirs(self.location)
146        except OSError as err:
147            if err.errno != errno.EEXIST:
148                self.log('Failed to make thumbnail cache dir:', as_unicode(err))
149        self.total_size = 0
150        self.items = OrderedDict()
151        order = self._read_order()
152
153        def listdir(*args):
154            try:
155                return os.listdir(os.path.join(*args))
156            except OSError:
157                return ()  # not a directory or no permission or whatever
158        entries = ('/'.join((parent, subdir, entry))
159                   for parent in listdir(self.location)
160                   for subdir in listdir(self.location, parent)
161                   for entry in listdir(self.location, parent, subdir))
162
163        invalidate = set()
164        try:
165            with open(os.path.join(self.location, 'invalidate'), 'rb') as f:
166                raw = f.read().decode('utf-8')
167        except OSError as err:
168            if getattr(err, 'errno', None) != errno.ENOENT:
169                self.log('Failed to read thumbnail invalidate data:', as_unicode(err))
170        else:
171            try:
172                os.remove(os.path.join(self.location, 'invalidate'))
173            except OSError as err:
174                self.log('Failed to remove thumbnail invalidate data:', as_unicode(err))
175            else:
176                def record(line):
177                    try:
178                        uuid, book_id = line.partition(' ')[0::2]
179                        book_id = int(book_id)
180                        return (uuid, book_id)
181                    except Exception:
182                        return None
183                invalidate = {record(x) for x in raw.splitlines()}
184        items = []
185        try:
186            for entry in entries:
187                try:
188                    uuid, name = entry.split('/')[0::2]
189                    book_id, timestamp, size, thumbnail_size = name.split('-')
190                    book_id, timestamp, size = int(book_id), float(timestamp), int(size)
191                    thumbnail_size = tuple(map(int, thumbnail_size.partition('x')[0::2]))
192                except (ValueError, TypeError, IndexError, KeyError, AttributeError):
193                    continue
194                key = (uuid, book_id)
195                path = os.path.join(self.location, entry)
196                if self.thumbnail_size == thumbnail_size and key not in invalidate:
197                    items.append((key, Entry(path, size, timestamp, thumbnail_size)))
198                    self.total_size += size
199                else:
200                    self._do_delete(path)
201        except OSError as err:
202            self.log('Failed to read thumbnail cache dir:', as_unicode(err))
203
204        self.items = OrderedDict(sorted(items, key=lambda x:order.get(x[0], 0)))
205        self._apply_size()
206
207    def _invalidate_sizes(self):
208        if self.size_changed:
209            size = self.thumbnail_size
210            remove = tuple(key for key, entry in iteritems(self.items) if size != entry.thumbnail_size)
211            for key in remove:
212                self._remove(key)
213            self.size_changed = False
214
215    def _remove(self, key):
216        entry = self.items.pop(key, None)
217        if entry is not None:
218            self._do_delete(entry.path)
219            self.total_size -= entry.size
220
221    def _apply_size(self):
222        while self.total_size > self.max_size and self.items:
223            entry = self.items.popitem(last=False)[1]
224            self._do_delete(entry.path)
225            self.total_size -= entry.size
226
227    def _write_order(self):
228        if hasattr(self, 'items'):
229            try:
230                data = '\n'.join(group_id + ' ' + str(book_id) for (group_id, book_id) in self.items)
231                with lopen(os.path.join(self.location, 'order'), 'wb') as f:
232                    f.write(data.encode('utf-8'))
233            except OSError as err:
234                self.log('Failed to save thumbnail cache order:', as_unicode(err))
235
236    def _read_order(self):
237        order = {}
238        try:
239            with lopen(os.path.join(self.location, 'order'), 'rb') as f:
240                for line in f.read().decode('utf-8').splitlines():
241                    parts = line.split(' ', 1)
242                    if len(parts) == 2:
243                        order[(parts[0], int(parts[1]))] = len(order)
244        except Exception as err:
245            if getattr(err, 'errno', None) != errno.ENOENT:
246                self.log('Failed to load thumbnail cache order:', as_unicode(err))
247        return order
248
249    def shutdown(self):
250        with self.lock:
251            self._write_order()
252
253    def set_group_id(self, group_id):
254        with self.lock:
255            self.group_id = group_id
256
257    def set_thumbnail_size(self, width, height):
258        new_size = (width, height)
259        with self.lock:
260            if new_size != self.thumbnail_size:
261                self.thumbnail_size = new_size
262                self.size_changed = True
263                return True
264        return False
265
266    def insert(self, book_id, timestamp, data):
267        if self.max_size < len(data):
268            return
269        with self.lock:
270            if not hasattr(self, 'total_size'):
271                self._load_index()
272            self._invalidate_sizes()
273            ts = ('%.2f' % timestamp).replace('.00', '')
274            path = '%s%s%s%s%d-%s-%d-%dx%d' % (
275                self.group_id, os.sep, book_id % 100, os.sep,
276                book_id, ts, len(data), self.thumbnail_size[0], self.thumbnail_size[1])
277            path = os.path.join(self.location, path)
278            key = (self.group_id, book_id)
279            e = self.items.pop(key, None)
280            self.total_size -= getattr(e, 'size', 0)
281            try:
282                with open(path, 'wb') as f:
283                    f.write(data)
284            except OSError as err:
285                d = os.path.dirname(path)
286                if not os.path.exists(d):
287                    try:
288                        os.makedirs(d)
289                        with open(path, 'wb') as f:
290                            f.write(data)
291                    except OSError as err:
292                        self.log('Failed to write cached thumbnail:', path, as_unicode(err))
293                        return self._apply_size()
294                else:
295                    self.log('Failed to write cached thumbnail:', path, as_unicode(err))
296                    return self._apply_size()
297            self.items[key] = Entry(path, len(data), timestamp, self.thumbnail_size)
298            self.total_size += len(data)
299            self._apply_size()
300
301    def __len__(self):
302        with self.lock:
303            try:
304                return len(self.items)
305            except AttributeError:
306                self._load_index()
307                return len(self.items)
308
309    def __contains__(self, book_id):
310        with self.lock:
311            try:
312                return (self.group_id, book_id) in self.items
313            except AttributeError:
314                self._load_index()
315                return (self.group_id, book_id) in self.items
316
317    def __getitem__(self, book_id):
318        with self.lock:
319            if not hasattr(self, 'total_size'):
320                self._load_index()
321            self._invalidate_sizes()
322            key = (self.group_id, book_id)
323            entry = self.items.pop(key, None)
324            if entry is None:
325                return None, None
326            if entry.thumbnail_size != self.thumbnail_size:
327                try:
328                    os.remove(entry.path)
329                except OSError as err:
330                    if getattr(err, 'errno', None) != errno.ENOENT:
331                        self.log('Failed to remove cached thumbnail:', entry.path, as_unicode(err))
332                self.total_size -= entry.size
333                return None, None
334            self.items[key] = entry
335            try:
336                with open(entry.path, 'rb') as f:
337                    data = f.read()
338            except OSError as err:
339                self.log('Failed to read cached thumbnail:', entry.path, as_unicode(err))
340                return None, None
341            return data, entry.timestamp
342
343    def invalidate(self, book_ids):
344        with self.lock:
345            if hasattr(self, 'total_size'):
346                for book_id in book_ids:
347                    self._remove((self.group_id, book_id))
348            elif os.path.exists(self.location):
349                try:
350                    raw = '\n'.join('%s %d' % (self.group_id, book_id) for book_id in book_ids)
351                    with open(os.path.join(self.location, 'invalidate'), 'ab') as f:
352                        f.write(raw.encode('ascii'))
353                except OSError as err:
354                    self.log('Failed to write invalidate thumbnail record:', as_unicode(err))
355
356    @property
357    def current_size(self):
358        with self.lock:
359            if not hasattr(self, 'total_size'):
360                self._load_index()
361            return self.total_size
362
363    def empty(self):
364        with self.lock:
365            try:
366                os.remove(os.path.join(self.location, 'order'))
367            except OSError:
368                pass
369            if not hasattr(self, 'total_size'):
370                self._load_index()
371            for entry in itervalues(self.items):
372                self._do_delete(entry.path)
373            self.total_size = 0
374            self.items = OrderedDict()
375
376    def __hash__(self):
377        return id(self)
378
379    def set_size(self, size_in_mb):
380        if size_in_mb <= self.min_disk_cache:
381            size_in_mb = 0
382        size_in_mb = max(0, size_in_mb)
383        with self.lock:
384            self.max_size = int(size_in_mb * (1024**2))
385            if hasattr(self, 'total_size'):
386                self._apply_size()
387
388
389number_separators = None
390
391
392def atof(string):
393    # Python 2.x does not handle unicode number separators correctly, so we
394    # have to implement our own
395    global number_separators
396    if number_separators is None:
397        if iswindows:
398            number_separators = get_windows_number_formats()
399        else:
400            lc = localeconv()
401            t, d = lc['thousands_sep'], lc['decimal_point']
402            if isinstance(t, bytes):
403                t = t.decode('utf-8', 'ignore') or ','
404            if isinstance(d, bytes):
405                d = d.decode('utf-8', 'ignore') or '.'
406            number_separators = t, d
407    return float(string.replace(number_separators[1], '.').replace(number_separators[0], ''))
408
409
410def type_safe_sort_key_function(keyfunc=None):
411    if keyfunc is None:
412        keyfunc = lambda x: x
413    sentinel = object()
414    first_value = sentinel
415
416    def key(x):
417        nonlocal first_value
418        ans = keyfunc(x)
419        if first_value is sentinel:
420            first_value = ans
421        else:
422            try:
423                ans < first_value
424                first_value < ans
425            except TypeError:
426                ans = first_value
427        return ans
428
429    return key
430