1#!/usr/bin/env python3
2
3# Copyright (C) 2015-2018 Damon Lynch <damonlynch@gmail.com>
4
5# This file is part of Rapid Photo Downloader.
6#
7# Rapid Photo Downloader is free software: you can redistribute it and/or
8# modify it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# Rapid Photo Downloader is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with Rapid Photo Downloader.  If not,
19# see <http://www.gnu.org/licenses/>.
20
21"""
22Rapid Photo Downloader deals with three types of cache:
23
241. An image cache whose sole purpose is to store thumbnails of scanned files
25   that have not necessarily been downloaded, but may have. This is only used
26   by Rapid Photo Downloader. It's needed because it's important to save
27   thumbnails that are not degraded by image resizing.
28   Name: Thumbnail Cache
29   Location: /home/USER/.cache/rapid-photo-downloader/thumbnails/
30   (Actual location may vary depending on value of environment variable
31   XDG_CACHE_HOME)
32
332. A cache of actual full files downloaded from a camera, which are then used
34   to extract the thumbnail from. Since these same files could be downloaded,
35   it makes sense to keep them cached until the program exits.
36   Name: Download Cache
37   Location: temporary subfolder in user specified download folder
38
393. The freedesktop.org thumbnail cache, for files that have been downloaded.
40   Name: FDO Cache
41   Location: /home/USER/.cache/thumbnails/
42   (Actual location may vary depending on value of environment variable
43   XDG_CACHE_HOME)
44
45For the fdo cache specs, see:
46http://specifications.freedesktop.org/thumbnail-spec/thumbnail-spec-latest.html
47"""
48
49__author__ = 'Damon Lynch'
50__copyright__ = "Copyright 2015-2018, Damon Lynch"
51
52import os
53import sys
54import logging
55import hashlib
56from urllib.request import pathname2url
57import time
58import shutil
59from collections import namedtuple
60from typing import Optional, Tuple, Union
61import sqlite3
62
63from PyQt5.QtCore import QSize
64from PyQt5.QtGui import QImage
65
66from raphodo.storage import get_program_cache_directory, get_fdo_cache_thumb_base_directory
67from raphodo.utilities import GenerateRandomFileName, format_size_for_user
68from raphodo.constants import ThumbnailCacheDiskStatus
69from raphodo.rpdsql import CacheSQL
70
71
72GetThumbnail = namedtuple('GetThumbnail', 'disk_status, thumbnail, path')
73GetThumbnailPath = namedtuple('GetThumbnailPath', 'disk_status, path, mdatatime, orientation_unknown')
74
75class MD5Name:
76    """Generate MD5 hashes for file names."""
77    def __init__(self) -> None:
78        self.fs_encoding = sys.getfilesystemencoding()
79
80    def get_uri(self, full_file_name: str, camera_model: Optional[str]=None) -> str:
81        """
82        :param full_file_name: path and file name of the file
83        :param camera_model: if file is on a camera, the model of the
84         camera
85        :return: uri
86        """
87        if camera_model is None:
88            prefix = 'file://'
89            path = os.path.abspath(full_file_name)
90        else:
91            # This is not a system standard: I'm using this for my own
92            # purposes (the port is not included, because it could easily vary)
93            prefix = 'gphoto2://'
94            path = '{}/{}'.format(camera_model, full_file_name)
95
96        return '{}{}'.format(prefix, pathname2url(path))
97
98    def md5_hash_name(self, full_file_name: str, camera_model: str=None,
99                      extension: Optional[str]='png') -> Tuple[str, str]:
100        """
101        Generate MD5 hash for the file name.
102
103        Uses file system encoding.
104
105        :param full_file_name: path and file name of the file
106        :param camera_model: if file is on a camera, the model of the
107         camera
108        :param extension: the extension to use in the file name
109        :return: hash name and uri that was used to generate the hash
110        """
111        uri = self.get_uri(full_file_name, camera_model)
112        return ('{md5}.{extension}'.format(
113            md5=hashlib.md5(uri.encode(self.fs_encoding)).hexdigest(),
114            extension=extension), uri)
115
116
117class Cache:
118    """
119    Base class with which to write and read cache thumbnails.
120    Create cache if it doesn't exist; checks validity.
121    """
122
123    def __init__(self, cache_dir: str, failure_dir: Optional[str]) -> None:
124        """
125        Create cache if it doesn't exist; checks validity.
126
127        :param cache_dir: full path of the directory into which
128         thumbnails will be saved / read.
129        :param failure_dir: full path of the directory into which
130         failed thumbnails will be saved / read (thumbnails that could
131         not be generated)
132        """
133
134        assert sys.platform.startswith('linux') or sys.platform.startswith('freebsd')
135        self.cache_dir = cache_dir
136        self.failure_dir = failure_dir
137        assert self.cache_dir
138
139        self.valid = self._create_directory(self.cache_dir, "Freedesktop.org thumbnail")
140
141        if self.valid:
142            self.random_filename = GenerateRandomFileName()
143            self.md5 = MD5Name()
144            if self.failure_dir is not None:
145                self.valid = self._create_directory(self.failure_dir, "thumbnails failure")
146
147        if not self.valid:
148            self.random_filename = self.fs_encoding = None
149
150    def _create_directory(self, dir: str, descrtiption: str) -> None:
151        try:
152            if not os.path.exists(dir):
153                os.makedirs(dir, 0o700)
154                logging.debug("Created %s cache at %s", descrtiption, dir)
155            elif not os.path.isdir(dir):
156                os.remove(dir)
157                logging.warning("Removed file %s", dir)
158                os.makedirs(dir, 0o700)
159                logging.debug("Created %s cache at %s", descrtiption, dir)
160        except OSError:
161            logging.error("Failed to create %s cache at %s", descrtiption, dir)
162            return False
163        return True
164
165    def save_thumbnail(self, full_file_name: str,
166                       size: int,
167                       modification_time: Union[float, int],
168                       generation_failed: bool,
169                       thumbnail: QImage,
170                       camera_model: str=None,
171                       free_desktop_org: bool=True) -> str:
172        """
173        Save a thumbnail in the thumbnail cache.
174
175        :param full_file_name: full path of the file (including file
176         name). If the path contains symbolic links, two thumbnails will be
177         saved: the canonical path (without symlinks), and the path as
178         passed.
179        :param size: size of the file in bytes
180        :param modification_time: file modification time, to be turned
181         into a float if it's not already
182        :param generation_failed: True if the thumbnail is meant to
183         signify the application failed to generate the thumbnail. If
184         so, it will be saved as an empty PNG in the application
185         subdirectory in the fail cache directory.
186        :param thumbnail: the thumbnail to be saved. Will not be
187         resized. Will be ignored if generation_failed is True.
188        :param camera_model: optional camera model. If the thumbnail is
189         not from a camera, then should be None.
190        :param free_desktop_org: if True, then image will be convereted
191         to 8bit mode if necessary
192        :return the md5_name of the saved file, else None if operation
193        failed
194        """
195
196        if not self.valid:
197            return None
198
199        # Save to both the real path and the path passed, which may include
200        # symbolic links
201        full_file_name_real_path = os.path.realpath(full_file_name)
202        if full_file_name_real_path != full_file_name:
203            self.save_thumbnail(full_file_name_real_path, size, modification_time,
204                                generation_failed, thumbnail, camera_model, free_desktop_org)
205
206        md5_name, uri = self.md5.md5_hash_name(full_file_name, camera_model)
207        if generation_failed:
208            thumbnail = QImage(QSize(1,1), QImage.Format_Indexed8)
209            save_dir = self.failure_dir
210        else:
211            save_dir = self.cache_dir
212        path = os.path.join(save_dir, md5_name)
213
214        thumbnail.setText('Thumb::URI', uri)
215        thumbnail.setText('Thumb::MTime', str(float(modification_time)))
216        thumbnail.setText('Thumb::Size', str(size))
217
218        if free_desktop_org and not generation_failed:
219            if thumbnail.depth() != 8:
220                thumbnail = thumbnail.convertToFormat(QImage.Format_Indexed8)
221
222        temp_path = os.path.join(save_dir, self.random_filename.name(extension='png'))
223        if thumbnail.save(temp_path):
224            os.rename(temp_path, path)
225            os.chmod(path, 0o600)
226            if generation_failed:
227                logging.debug("Wrote {}x{} thumbnail {} for {}".format(
228                    thumbnail.width(), thumbnail.height(), path, uri))
229            return md5_name
230        else:
231            return None
232
233    def _get_thumbnail(self, path: str, modification_time: float, size: int) -> Optional[bytes]:
234        if os.path.exists(path):
235            png = QImage(path)
236            if not png.isNull():
237                try:
238                    mtime = float(png.text('Thumb::MTime'))
239                    thumb_size = int(png.text('Thumb::Size'))
240                except ValueError:
241                    return None
242                if mtime == float(modification_time) and thumb_size == size:
243                    return png
244        return None
245
246
247    def get_thumbnail_md5_name(self, full_file_name: str,
248                               camera_model: Optional[str] = None) -> str:
249        """
250        Returns the md5 name for the photo or video. Does not check if the file exists
251        on the file system in the cache.
252
253        :param full_file_name: full_file_name: full path of the file (including file
254        name). Will be turned into an absolute path if it is a file
255        system path
256        :param camera_model: optional camera model. If the thumbnail is
257         not from a camera, then should be None.
258        :return: the md5 name
259        """
260
261        return self.md5.md5_hash_name(full_file_name=full_file_name, camera_model=camera_model)[0]
262
263    def get_thumbnail(self, full_file_name: str, modification_time, size: int,
264                      camera_model: Optional[str]=None) -> GetThumbnail:
265        """
266        Attempt to retrieve a thumbnail from the thumbnail cache.
267        :param full_file_name: full path of the file (including file
268        name). Will be turned into an absolute path if it is a file
269        system path
270        :param size: size of the file in bytes
271        :param modification_time: file modification time, to be turned
272         into a float if it's not already
273        :param camera_model: optional camera model. If the thumbnail is
274         not from a camera, then should be None.
275        :return a GetThumbnail tuple of (1) ThumbnailCacheDiskStatus,
276         to indicate whether the thumbnail was found, a failure, or
277         missing (2) the thumbnail as QImage, if found (or None), and
278         (3) the path (including the md5 name), else None,
279        """
280
281        if not self.valid:
282            return GetThumbnail(ThumbnailCacheDiskStatus.not_found, None, None)
283        md5_name, uri = self.md5.md5_hash_name(full_file_name=full_file_name,
284                                               camera_model=camera_model)
285        path = os.path.join(self.cache_dir, md5_name)
286        png = self._get_thumbnail(path, modification_time, size)
287        if png is not None:
288            return GetThumbnail(ThumbnailCacheDiskStatus.found, png, path)
289        if self.failure_dir is not None:
290            path = os.path.join(self.failure_dir, md5_name)
291            png = self._get_thumbnail(path, modification_time, size)
292            if png is not None:
293                return GetThumbnail(ThumbnailCacheDiskStatus.failure, None, None)
294        return GetThumbnail(ThumbnailCacheDiskStatus.not_found, None, None)
295
296    def modify_existing_thumbnail_and_save_copy(self,
297                              existing_cache_thumbnail: str,
298                              full_file_name: str, modification_time,
299                              size: int,
300                              error_on_missing_thumbnail: bool) -> str:
301        """
302
303        :param existing_cache_thumbnail: the md5 name of the cache thumbnail,
304         without the path to the cache
305        :param full_file_name: full path of the file (including file
306        name). Will be turned into an absolute path if need be
307        :param size: size of the file in bytes
308        :param modification_time: file modification time, to be turned
309         into a float if it's not already
310        :param error_on_missing_thumbnail: if True, issue error if thumbnail is
311         not located (useful when dealing with FDO 128 cache, but not helpful
312         with FDO 256 cache as not all RAW files have thumbnails large enough)
313        :return: the path of the saved file, else None if operation
314        failed
315        """
316
317        existing_cache_thumbnail_full_path = os.path.join(self.cache_dir, existing_cache_thumbnail)
318        if not os.path.isfile(existing_cache_thumbnail_full_path):
319            if error_on_missing_thumbnail:
320                logging.error("No FDO thumbnail to copy for %s", full_file_name)
321            return None
322        thumbnail = QImage(existing_cache_thumbnail_full_path)
323        if not thumbnail.isNull():
324            return self.save_thumbnail(full_file_name=full_file_name,
325                   size=size, modification_time=modification_time,
326                   generation_failed=False, thumbnail=thumbnail,
327                   camera_model=None, free_desktop_org=False)
328        else:
329            return None
330
331    def delete_thumbnail(self, full_file_name: str, camera_model: str=None) -> None:
332        """
333        Delete the thumbnail associated with the file if it exists
334        """
335        if not self.valid:
336            return None
337        md5_name, uri = self.md5_hash_name(full_file_name, camera_model)
338        path = os.path.join(self.cache_dir, md5_name)
339        if os.path.isfile(path):
340            os.remove(path)
341        else:
342            path = os.path.join(self.failure_dir, md5_name)
343            if os.path.isfile(path):
344                os.remove(path)
345
346
347class FdoCacheNormal(Cache):
348    """
349    Freedesktop.org thumbnail cache for thumbnails <= 128x128
350    """
351    def __init__(self):
352        path = get_fdo_cache_thumb_base_directory()
353        cache_dir = os.path.join(path, 'normal')
354        failure_dir = None
355        super().__init__(cache_dir, failure_dir)
356
357
358class FdoCacheLarge(Cache):
359    """
360    Freedesktop.org thumbnail cache for thumbnails > 128x128 & <= 256x256
361    """
362    def __init__(self):
363        path = get_fdo_cache_thumb_base_directory()
364        cache_dir = os.path.join(path, 'large')
365        failure_dir = None
366        super().__init__(cache_dir, failure_dir)
367
368
369class ThumbnailCacheSql:
370
371    not_found = GetThumbnailPath(ThumbnailCacheDiskStatus.not_found, None, None, None)
372
373    def __init__(self, create_table_if_not_exists: bool) -> None:
374        self.cache_dir = get_program_cache_directory(create_if_not_exist=True)
375        self.valid = self.cache_dir is not None
376        if not self.valid:
377            return
378
379        assert self.cache_dir is not None
380        self.cache_dir = os.path.join(self.cache_dir, 'thumbnails')
381        try:
382            if not os.path.exists(self.cache_dir):
383                os.makedirs(self.cache_dir, 0o700)
384                logging.debug("Created thumbnails cache %s", self.cache_dir)
385            elif not os.path.isdir(self.cache_dir):
386                os.remove(self.cache_dir)
387                logging.warning("Removed file %s", self.cache_dir)
388                os.makedirs(self.cache_dir, 0o700)
389                logging.debug("Created thumbnails cache %s", self.cache_dir)
390        except:
391            logging.error(
392                "Failed to create Rapid Photo Downloader Thumbnail Cache at %s", self.cache_dir
393            )
394            self.valid = False
395            self.cache_dir = None
396            self.random_filename = None
397            self.fs_encoding = None
398        else:
399            self.random_filename = GenerateRandomFileName()
400            self.md5 = MD5Name()
401            self.thumb_db = CacheSQL(self.cache_dir, create_table_if_not_exists)
402
403    def save_thumbnail(self, full_file_name: str, size: int,
404                       mtime: float,
405                       mdatatime: float,
406                       generation_failed: bool,
407                       orientation_unknown: bool,
408                       thumbnail: Optional[QImage],
409                       camera_model: Optional[str]=None) -> Optional[str]:
410        """
411        Save in the thumbnail cache using jpeg 75% compression.
412
413        :param full_file_name: full path of the file (including file
414        name). Will be turned into an absolute path if it is a file
415        system path
416        :param size: size of the file in bytes
417        :param mtime: file modification time
418        :param mdatatime: file time recorded in metadata
419        :param generation_failed: True if the thumbnail is meant to
420         signify the application failed to generate the thumbnail. If
421         so, it will be saved as an empty PNG in the application
422         subdirectory in the fail cache directory.
423        :param thumbnail: the thumbnail to be saved. Will not be
424         resized. Will be ignored if generation_failed is True.
425        :param camera_model: optional camera model. If the thumbnail is
426         not from a camera, then should be None.
427        :return the path of the saved file, else None if operation
428        failed
429        """
430
431        if not self.valid:
432            return None
433
434        md5_name, uri = self.md5.md5_hash_name(full_file_name=full_file_name,
435                                               camera_model=camera_model, extension='jpg')
436
437        if generation_failed:
438            logging.debug("Marking thumbnail for %s as 'generation failed'", uri)
439        else:
440            logging.debug("Saving thumbnail for %s in RPD thumbnail cache", uri)
441
442        try:
443            self.thumb_db.add_thumbnail(uri=uri, size=size, mtime=mtime,
444                                    mdatatime=mdatatime,
445                                    md5_name=md5_name, orientation_unknown=orientation_unknown,
446                                    failure=generation_failed)
447        except sqlite3.OperationalError as e:
448            logging.error("Database error adding thumbnail for %s: %s. Will not retry.", uri, e)
449            return None
450
451        if generation_failed:
452            return None
453
454        md5_full_name = os.path.join(self.cache_dir, md5_name)
455
456        temp_path = os.path.join(self.cache_dir, self.random_filename.name(extension='jpg'))
457
458        if thumbnail.save(temp_path, format='jpg', quality=75):
459            try:
460                os.rename(temp_path, md5_full_name)
461                os.chmod(md5_full_name, 0o600)
462            except OSError:
463                return None
464
465            return md5_full_name
466        return None
467
468    def get_thumbnail_path(self, full_file_name: str, mtime, size: int,
469                           camera_model: str=None) -> GetThumbnailPath:
470        """
471        Attempt to get a thumbnail's path from the thumbnail cache.
472
473        :param full_file_name: full path of the file (including file
474        name). Will be turned into an absolute path if it is a file
475        system path
476        :param size: size of the file in bytes
477        :param mtime: file modification time, to be turned
478         into a float if it's not already
479        :param camera_model: optional camera model. If the thumbnail is
480         not from a camera, then should be None.
481        :return a GetThumbnailPath tuple of (1) ThumbnailCacheDiskStatus,
482         to indicate whether the thumbnail was found, a failure, or
483         missing, (2) the path (including the md5 name), else None,
484         (3) the file's metadata time, and (4) a bool indicating whether
485         the orientation of the thumbnail is unknown
486        """
487
488        if not self.valid:
489            return self.not_found
490
491        uri = self.md5.get_uri(full_file_name, camera_model)
492        in_cache = self.thumb_db.have_thumbnail(uri, size, mtime)
493
494        if in_cache is None:
495            return self.not_found
496
497        if in_cache.failure:
498            return GetThumbnailPath(ThumbnailCacheDiskStatus.failure, None,
499                                    in_cache.mdatatime, None)
500
501        path = os.path.join(self.cache_dir, in_cache.md5_name)
502        if not os.path.exists(path):
503            self.thumb_db.delete_thumbnails([in_cache.md5_name])
504            return self.not_found
505
506        return GetThumbnailPath(ThumbnailCacheDiskStatus.found, path,
507                                in_cache.mdatatime, in_cache.orientation_unknown)
508
509
510    def cleanup_cache(self, days: int=30) -> None:
511        """
512        Remove all thumbnails that have not been accessed for x days
513
514        :param how many days to remove from
515        """
516        time_period = 60 * 60 * 24 * days
517        if self.valid:
518            i = 0
519            now = time.time()
520            deleted_thumbnails = []
521            for name in os.listdir(self.cache_dir):
522                thumbnail = os.path.join(self.cache_dir, name)
523                if (os.path.isfile(thumbnail) and
524                        os.path.getatime(thumbnail) < now - time_period):
525                    os.remove(thumbnail)
526                    deleted_thumbnails.append(name)
527            if len(deleted_thumbnails):
528                if self.thumb_db.cache_exists():
529                    self.thumb_db.delete_thumbnails(deleted_thumbnails)
530                logging.debug(
531                    'Deleted {} thumbnail files that had not been accessed for {} or more days'.format(
532                        len(deleted_thumbnails), days
533                    )
534                )
535
536    def purge_cache(self) -> None:
537        """
538        Delete the entire cache of all contents and remove the
539        directory
540        """
541        if self.valid:
542            if self.cache_dir is not None and os.path.isdir(self.cache_dir):
543                # Delete the sqlite3 database too
544                shutil.rmtree(self.cache_dir)
545
546    def no_thumbnails(self) -> int:
547        """
548        :return: how many thumbnails there are in the thumbnail database
549        """
550
551        if not self.valid:
552            return 0
553        return self.thumb_db.no_thumbnails()
554
555    def cache_size(self) -> int:
556        """
557        :return: the size of the entire cache (include the database) in bytes
558        """
559
560        if not self.valid:
561            return 0
562        cwd = os.getcwd()
563        os.chdir(self.cache_dir)
564        s = sum(os.path.getsize(f) for f in os.listdir('.') if os.path.isfile(f))
565        os.chdir(cwd)
566        return s
567
568    def db_size(self) -> int:
569        """
570        :return: the size in bytes of the sql database file
571        """
572
573        if not self.valid:
574            return 0
575        return os.path.getsize(self.thumb_db.db)
576
577    def optimize(self) -> Tuple[int, int, int]:
578        """
579        Check for any thumbnails in the db that are not in the file system
580        Check for any thumbnails exist on the file system that are not in the db
581        Vacuum the db
582
583        :return db rows removed, file system photos removed, db size reduction in bytes
584        """
585
586        rows = self.thumb_db.md5_names()
587        rows = {row[0] for row in rows}
588        cwd = os.getcwd()
589        os.chdir(self.cache_dir)
590
591        to_delete_from_db = {md5 for md5 in rows if not os.path.exists(md5)}
592        if len(to_delete_from_db):
593            self.thumb_db.delete_thumbnails(list(to_delete_from_db))
594
595        md5s = {md5 for md5 in os.listdir('.')} - {self.thumb_db.db_fs_name()}
596        to_delete_from_fs = md5s - rows
597        if len(to_delete_from_fs):
598            for md5 in to_delete_from_fs:
599                os.remove(md5)
600
601        os.chdir(cwd)
602
603        size = self.db_size()
604        self.thumb_db.vacuum()
605
606        return len(to_delete_from_db), len(to_delete_from_fs), size - self.db_size()
607
608
609if __name__ == '__main__':
610    db = ThumbnailCacheSql(create_table_if_not_exists=True)
611    db.optimize()