1#!/usr/bin/env python3 2 3# Copyright (C) 2015-2018 Damon Lynch <damonlynch@gmail.com> 4 5# This file is part of Rapid Photo Downloader. 6# 7# Rapid Photo Downloader is free software: you can redistribute it and/or 8# modify it under the terms of the GNU General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# Rapid Photo Downloader is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with Rapid Photo Downloader. If not, 19# see <http://www.gnu.org/licenses/>. 20 21""" 22Rapid Photo Downloader deals with three types of cache: 23 241. An image cache whose sole purpose is to store thumbnails of scanned files 25 that have not necessarily been downloaded, but may have. This is only used 26 by Rapid Photo Downloader. It's needed because it's important to save 27 thumbnails that are not degraded by image resizing. 28 Name: Thumbnail Cache 29 Location: /home/USER/.cache/rapid-photo-downloader/thumbnails/ 30 (Actual location may vary depending on value of environment variable 31 XDG_CACHE_HOME) 32 332. A cache of actual full files downloaded from a camera, which are then used 34 to extract the thumbnail from. Since these same files could be downloaded, 35 it makes sense to keep them cached until the program exits. 36 Name: Download Cache 37 Location: temporary subfolder in user specified download folder 38 393. The freedesktop.org thumbnail cache, for files that have been downloaded. 40 Name: FDO Cache 41 Location: /home/USER/.cache/thumbnails/ 42 (Actual location may vary depending on value of environment variable 43 XDG_CACHE_HOME) 44 45For the fdo cache specs, see: 46http://specifications.freedesktop.org/thumbnail-spec/thumbnail-spec-latest.html 47""" 48 49__author__ = 'Damon Lynch' 50__copyright__ = "Copyright 2015-2018, Damon Lynch" 51 52import os 53import sys 54import logging 55import hashlib 56from urllib.request import pathname2url 57import time 58import shutil 59from collections import namedtuple 60from typing import Optional, Tuple, Union 61import sqlite3 62 63from PyQt5.QtCore import QSize 64from PyQt5.QtGui import QImage 65 66from raphodo.storage import get_program_cache_directory, get_fdo_cache_thumb_base_directory 67from raphodo.utilities import GenerateRandomFileName, format_size_for_user 68from raphodo.constants import ThumbnailCacheDiskStatus 69from raphodo.rpdsql import CacheSQL 70 71 72GetThumbnail = namedtuple('GetThumbnail', 'disk_status, thumbnail, path') 73GetThumbnailPath = namedtuple('GetThumbnailPath', 'disk_status, path, mdatatime, orientation_unknown') 74 75class MD5Name: 76 """Generate MD5 hashes for file names.""" 77 def __init__(self) -> None: 78 self.fs_encoding = sys.getfilesystemencoding() 79 80 def get_uri(self, full_file_name: str, camera_model: Optional[str]=None) -> str: 81 """ 82 :param full_file_name: path and file name of the file 83 :param camera_model: if file is on a camera, the model of the 84 camera 85 :return: uri 86 """ 87 if camera_model is None: 88 prefix = 'file://' 89 path = os.path.abspath(full_file_name) 90 else: 91 # This is not a system standard: I'm using this for my own 92 # purposes (the port is not included, because it could easily vary) 93 prefix = 'gphoto2://' 94 path = '{}/{}'.format(camera_model, full_file_name) 95 96 return '{}{}'.format(prefix, pathname2url(path)) 97 98 def md5_hash_name(self, full_file_name: str, camera_model: str=None, 99 extension: Optional[str]='png') -> Tuple[str, str]: 100 """ 101 Generate MD5 hash for the file name. 102 103 Uses file system encoding. 104 105 :param full_file_name: path and file name of the file 106 :param camera_model: if file is on a camera, the model of the 107 camera 108 :param extension: the extension to use in the file name 109 :return: hash name and uri that was used to generate the hash 110 """ 111 uri = self.get_uri(full_file_name, camera_model) 112 return ('{md5}.{extension}'.format( 113 md5=hashlib.md5(uri.encode(self.fs_encoding)).hexdigest(), 114 extension=extension), uri) 115 116 117class Cache: 118 """ 119 Base class with which to write and read cache thumbnails. 120 Create cache if it doesn't exist; checks validity. 121 """ 122 123 def __init__(self, cache_dir: str, failure_dir: Optional[str]) -> None: 124 """ 125 Create cache if it doesn't exist; checks validity. 126 127 :param cache_dir: full path of the directory into which 128 thumbnails will be saved / read. 129 :param failure_dir: full path of the directory into which 130 failed thumbnails will be saved / read (thumbnails that could 131 not be generated) 132 """ 133 134 assert sys.platform.startswith('linux') or sys.platform.startswith('freebsd') 135 self.cache_dir = cache_dir 136 self.failure_dir = failure_dir 137 assert self.cache_dir 138 139 self.valid = self._create_directory(self.cache_dir, "Freedesktop.org thumbnail") 140 141 if self.valid: 142 self.random_filename = GenerateRandomFileName() 143 self.md5 = MD5Name() 144 if self.failure_dir is not None: 145 self.valid = self._create_directory(self.failure_dir, "thumbnails failure") 146 147 if not self.valid: 148 self.random_filename = self.fs_encoding = None 149 150 def _create_directory(self, dir: str, descrtiption: str) -> None: 151 try: 152 if not os.path.exists(dir): 153 os.makedirs(dir, 0o700) 154 logging.debug("Created %s cache at %s", descrtiption, dir) 155 elif not os.path.isdir(dir): 156 os.remove(dir) 157 logging.warning("Removed file %s", dir) 158 os.makedirs(dir, 0o700) 159 logging.debug("Created %s cache at %s", descrtiption, dir) 160 except OSError: 161 logging.error("Failed to create %s cache at %s", descrtiption, dir) 162 return False 163 return True 164 165 def save_thumbnail(self, full_file_name: str, 166 size: int, 167 modification_time: Union[float, int], 168 generation_failed: bool, 169 thumbnail: QImage, 170 camera_model: str=None, 171 free_desktop_org: bool=True) -> str: 172 """ 173 Save a thumbnail in the thumbnail cache. 174 175 :param full_file_name: full path of the file (including file 176 name). If the path contains symbolic links, two thumbnails will be 177 saved: the canonical path (without symlinks), and the path as 178 passed. 179 :param size: size of the file in bytes 180 :param modification_time: file modification time, to be turned 181 into a float if it's not already 182 :param generation_failed: True if the thumbnail is meant to 183 signify the application failed to generate the thumbnail. If 184 so, it will be saved as an empty PNG in the application 185 subdirectory in the fail cache directory. 186 :param thumbnail: the thumbnail to be saved. Will not be 187 resized. Will be ignored if generation_failed is True. 188 :param camera_model: optional camera model. If the thumbnail is 189 not from a camera, then should be None. 190 :param free_desktop_org: if True, then image will be convereted 191 to 8bit mode if necessary 192 :return the md5_name of the saved file, else None if operation 193 failed 194 """ 195 196 if not self.valid: 197 return None 198 199 # Save to both the real path and the path passed, which may include 200 # symbolic links 201 full_file_name_real_path = os.path.realpath(full_file_name) 202 if full_file_name_real_path != full_file_name: 203 self.save_thumbnail(full_file_name_real_path, size, modification_time, 204 generation_failed, thumbnail, camera_model, free_desktop_org) 205 206 md5_name, uri = self.md5.md5_hash_name(full_file_name, camera_model) 207 if generation_failed: 208 thumbnail = QImage(QSize(1,1), QImage.Format_Indexed8) 209 save_dir = self.failure_dir 210 else: 211 save_dir = self.cache_dir 212 path = os.path.join(save_dir, md5_name) 213 214 thumbnail.setText('Thumb::URI', uri) 215 thumbnail.setText('Thumb::MTime', str(float(modification_time))) 216 thumbnail.setText('Thumb::Size', str(size)) 217 218 if free_desktop_org and not generation_failed: 219 if thumbnail.depth() != 8: 220 thumbnail = thumbnail.convertToFormat(QImage.Format_Indexed8) 221 222 temp_path = os.path.join(save_dir, self.random_filename.name(extension='png')) 223 if thumbnail.save(temp_path): 224 os.rename(temp_path, path) 225 os.chmod(path, 0o600) 226 if generation_failed: 227 logging.debug("Wrote {}x{} thumbnail {} for {}".format( 228 thumbnail.width(), thumbnail.height(), path, uri)) 229 return md5_name 230 else: 231 return None 232 233 def _get_thumbnail(self, path: str, modification_time: float, size: int) -> Optional[bytes]: 234 if os.path.exists(path): 235 png = QImage(path) 236 if not png.isNull(): 237 try: 238 mtime = float(png.text('Thumb::MTime')) 239 thumb_size = int(png.text('Thumb::Size')) 240 except ValueError: 241 return None 242 if mtime == float(modification_time) and thumb_size == size: 243 return png 244 return None 245 246 247 def get_thumbnail_md5_name(self, full_file_name: str, 248 camera_model: Optional[str] = None) -> str: 249 """ 250 Returns the md5 name for the photo or video. Does not check if the file exists 251 on the file system in the cache. 252 253 :param full_file_name: full_file_name: full path of the file (including file 254 name). Will be turned into an absolute path if it is a file 255 system path 256 :param camera_model: optional camera model. If the thumbnail is 257 not from a camera, then should be None. 258 :return: the md5 name 259 """ 260 261 return self.md5.md5_hash_name(full_file_name=full_file_name, camera_model=camera_model)[0] 262 263 def get_thumbnail(self, full_file_name: str, modification_time, size: int, 264 camera_model: Optional[str]=None) -> GetThumbnail: 265 """ 266 Attempt to retrieve a thumbnail from the thumbnail cache. 267 :param full_file_name: full path of the file (including file 268 name). Will be turned into an absolute path if it is a file 269 system path 270 :param size: size of the file in bytes 271 :param modification_time: file modification time, to be turned 272 into a float if it's not already 273 :param camera_model: optional camera model. If the thumbnail is 274 not from a camera, then should be None. 275 :return a GetThumbnail tuple of (1) ThumbnailCacheDiskStatus, 276 to indicate whether the thumbnail was found, a failure, or 277 missing (2) the thumbnail as QImage, if found (or None), and 278 (3) the path (including the md5 name), else None, 279 """ 280 281 if not self.valid: 282 return GetThumbnail(ThumbnailCacheDiskStatus.not_found, None, None) 283 md5_name, uri = self.md5.md5_hash_name(full_file_name=full_file_name, 284 camera_model=camera_model) 285 path = os.path.join(self.cache_dir, md5_name) 286 png = self._get_thumbnail(path, modification_time, size) 287 if png is not None: 288 return GetThumbnail(ThumbnailCacheDiskStatus.found, png, path) 289 if self.failure_dir is not None: 290 path = os.path.join(self.failure_dir, md5_name) 291 png = self._get_thumbnail(path, modification_time, size) 292 if png is not None: 293 return GetThumbnail(ThumbnailCacheDiskStatus.failure, None, None) 294 return GetThumbnail(ThumbnailCacheDiskStatus.not_found, None, None) 295 296 def modify_existing_thumbnail_and_save_copy(self, 297 existing_cache_thumbnail: str, 298 full_file_name: str, modification_time, 299 size: int, 300 error_on_missing_thumbnail: bool) -> str: 301 """ 302 303 :param existing_cache_thumbnail: the md5 name of the cache thumbnail, 304 without the path to the cache 305 :param full_file_name: full path of the file (including file 306 name). Will be turned into an absolute path if need be 307 :param size: size of the file in bytes 308 :param modification_time: file modification time, to be turned 309 into a float if it's not already 310 :param error_on_missing_thumbnail: if True, issue error if thumbnail is 311 not located (useful when dealing with FDO 128 cache, but not helpful 312 with FDO 256 cache as not all RAW files have thumbnails large enough) 313 :return: the path of the saved file, else None if operation 314 failed 315 """ 316 317 existing_cache_thumbnail_full_path = os.path.join(self.cache_dir, existing_cache_thumbnail) 318 if not os.path.isfile(existing_cache_thumbnail_full_path): 319 if error_on_missing_thumbnail: 320 logging.error("No FDO thumbnail to copy for %s", full_file_name) 321 return None 322 thumbnail = QImage(existing_cache_thumbnail_full_path) 323 if not thumbnail.isNull(): 324 return self.save_thumbnail(full_file_name=full_file_name, 325 size=size, modification_time=modification_time, 326 generation_failed=False, thumbnail=thumbnail, 327 camera_model=None, free_desktop_org=False) 328 else: 329 return None 330 331 def delete_thumbnail(self, full_file_name: str, camera_model: str=None) -> None: 332 """ 333 Delete the thumbnail associated with the file if it exists 334 """ 335 if not self.valid: 336 return None 337 md5_name, uri = self.md5_hash_name(full_file_name, camera_model) 338 path = os.path.join(self.cache_dir, md5_name) 339 if os.path.isfile(path): 340 os.remove(path) 341 else: 342 path = os.path.join(self.failure_dir, md5_name) 343 if os.path.isfile(path): 344 os.remove(path) 345 346 347class FdoCacheNormal(Cache): 348 """ 349 Freedesktop.org thumbnail cache for thumbnails <= 128x128 350 """ 351 def __init__(self): 352 path = get_fdo_cache_thumb_base_directory() 353 cache_dir = os.path.join(path, 'normal') 354 failure_dir = None 355 super().__init__(cache_dir, failure_dir) 356 357 358class FdoCacheLarge(Cache): 359 """ 360 Freedesktop.org thumbnail cache for thumbnails > 128x128 & <= 256x256 361 """ 362 def __init__(self): 363 path = get_fdo_cache_thumb_base_directory() 364 cache_dir = os.path.join(path, 'large') 365 failure_dir = None 366 super().__init__(cache_dir, failure_dir) 367 368 369class ThumbnailCacheSql: 370 371 not_found = GetThumbnailPath(ThumbnailCacheDiskStatus.not_found, None, None, None) 372 373 def __init__(self, create_table_if_not_exists: bool) -> None: 374 self.cache_dir = get_program_cache_directory(create_if_not_exist=True) 375 self.valid = self.cache_dir is not None 376 if not self.valid: 377 return 378 379 assert self.cache_dir is not None 380 self.cache_dir = os.path.join(self.cache_dir, 'thumbnails') 381 try: 382 if not os.path.exists(self.cache_dir): 383 os.makedirs(self.cache_dir, 0o700) 384 logging.debug("Created thumbnails cache %s", self.cache_dir) 385 elif not os.path.isdir(self.cache_dir): 386 os.remove(self.cache_dir) 387 logging.warning("Removed file %s", self.cache_dir) 388 os.makedirs(self.cache_dir, 0o700) 389 logging.debug("Created thumbnails cache %s", self.cache_dir) 390 except: 391 logging.error( 392 "Failed to create Rapid Photo Downloader Thumbnail Cache at %s", self.cache_dir 393 ) 394 self.valid = False 395 self.cache_dir = None 396 self.random_filename = None 397 self.fs_encoding = None 398 else: 399 self.random_filename = GenerateRandomFileName() 400 self.md5 = MD5Name() 401 self.thumb_db = CacheSQL(self.cache_dir, create_table_if_not_exists) 402 403 def save_thumbnail(self, full_file_name: str, size: int, 404 mtime: float, 405 mdatatime: float, 406 generation_failed: bool, 407 orientation_unknown: bool, 408 thumbnail: Optional[QImage], 409 camera_model: Optional[str]=None) -> Optional[str]: 410 """ 411 Save in the thumbnail cache using jpeg 75% compression. 412 413 :param full_file_name: full path of the file (including file 414 name). Will be turned into an absolute path if it is a file 415 system path 416 :param size: size of the file in bytes 417 :param mtime: file modification time 418 :param mdatatime: file time recorded in metadata 419 :param generation_failed: True if the thumbnail is meant to 420 signify the application failed to generate the thumbnail. If 421 so, it will be saved as an empty PNG in the application 422 subdirectory in the fail cache directory. 423 :param thumbnail: the thumbnail to be saved. Will not be 424 resized. Will be ignored if generation_failed is True. 425 :param camera_model: optional camera model. If the thumbnail is 426 not from a camera, then should be None. 427 :return the path of the saved file, else None if operation 428 failed 429 """ 430 431 if not self.valid: 432 return None 433 434 md5_name, uri = self.md5.md5_hash_name(full_file_name=full_file_name, 435 camera_model=camera_model, extension='jpg') 436 437 if generation_failed: 438 logging.debug("Marking thumbnail for %s as 'generation failed'", uri) 439 else: 440 logging.debug("Saving thumbnail for %s in RPD thumbnail cache", uri) 441 442 try: 443 self.thumb_db.add_thumbnail(uri=uri, size=size, mtime=mtime, 444 mdatatime=mdatatime, 445 md5_name=md5_name, orientation_unknown=orientation_unknown, 446 failure=generation_failed) 447 except sqlite3.OperationalError as e: 448 logging.error("Database error adding thumbnail for %s: %s. Will not retry.", uri, e) 449 return None 450 451 if generation_failed: 452 return None 453 454 md5_full_name = os.path.join(self.cache_dir, md5_name) 455 456 temp_path = os.path.join(self.cache_dir, self.random_filename.name(extension='jpg')) 457 458 if thumbnail.save(temp_path, format='jpg', quality=75): 459 try: 460 os.rename(temp_path, md5_full_name) 461 os.chmod(md5_full_name, 0o600) 462 except OSError: 463 return None 464 465 return md5_full_name 466 return None 467 468 def get_thumbnail_path(self, full_file_name: str, mtime, size: int, 469 camera_model: str=None) -> GetThumbnailPath: 470 """ 471 Attempt to get a thumbnail's path from the thumbnail cache. 472 473 :param full_file_name: full path of the file (including file 474 name). Will be turned into an absolute path if it is a file 475 system path 476 :param size: size of the file in bytes 477 :param mtime: file modification time, to be turned 478 into a float if it's not already 479 :param camera_model: optional camera model. If the thumbnail is 480 not from a camera, then should be None. 481 :return a GetThumbnailPath tuple of (1) ThumbnailCacheDiskStatus, 482 to indicate whether the thumbnail was found, a failure, or 483 missing, (2) the path (including the md5 name), else None, 484 (3) the file's metadata time, and (4) a bool indicating whether 485 the orientation of the thumbnail is unknown 486 """ 487 488 if not self.valid: 489 return self.not_found 490 491 uri = self.md5.get_uri(full_file_name, camera_model) 492 in_cache = self.thumb_db.have_thumbnail(uri, size, mtime) 493 494 if in_cache is None: 495 return self.not_found 496 497 if in_cache.failure: 498 return GetThumbnailPath(ThumbnailCacheDiskStatus.failure, None, 499 in_cache.mdatatime, None) 500 501 path = os.path.join(self.cache_dir, in_cache.md5_name) 502 if not os.path.exists(path): 503 self.thumb_db.delete_thumbnails([in_cache.md5_name]) 504 return self.not_found 505 506 return GetThumbnailPath(ThumbnailCacheDiskStatus.found, path, 507 in_cache.mdatatime, in_cache.orientation_unknown) 508 509 510 def cleanup_cache(self, days: int=30) -> None: 511 """ 512 Remove all thumbnails that have not been accessed for x days 513 514 :param how many days to remove from 515 """ 516 time_period = 60 * 60 * 24 * days 517 if self.valid: 518 i = 0 519 now = time.time() 520 deleted_thumbnails = [] 521 for name in os.listdir(self.cache_dir): 522 thumbnail = os.path.join(self.cache_dir, name) 523 if (os.path.isfile(thumbnail) and 524 os.path.getatime(thumbnail) < now - time_period): 525 os.remove(thumbnail) 526 deleted_thumbnails.append(name) 527 if len(deleted_thumbnails): 528 if self.thumb_db.cache_exists(): 529 self.thumb_db.delete_thumbnails(deleted_thumbnails) 530 logging.debug( 531 'Deleted {} thumbnail files that had not been accessed for {} or more days'.format( 532 len(deleted_thumbnails), days 533 ) 534 ) 535 536 def purge_cache(self) -> None: 537 """ 538 Delete the entire cache of all contents and remove the 539 directory 540 """ 541 if self.valid: 542 if self.cache_dir is not None and os.path.isdir(self.cache_dir): 543 # Delete the sqlite3 database too 544 shutil.rmtree(self.cache_dir) 545 546 def no_thumbnails(self) -> int: 547 """ 548 :return: how many thumbnails there are in the thumbnail database 549 """ 550 551 if not self.valid: 552 return 0 553 return self.thumb_db.no_thumbnails() 554 555 def cache_size(self) -> int: 556 """ 557 :return: the size of the entire cache (include the database) in bytes 558 """ 559 560 if not self.valid: 561 return 0 562 cwd = os.getcwd() 563 os.chdir(self.cache_dir) 564 s = sum(os.path.getsize(f) for f in os.listdir('.') if os.path.isfile(f)) 565 os.chdir(cwd) 566 return s 567 568 def db_size(self) -> int: 569 """ 570 :return: the size in bytes of the sql database file 571 """ 572 573 if not self.valid: 574 return 0 575 return os.path.getsize(self.thumb_db.db) 576 577 def optimize(self) -> Tuple[int, int, int]: 578 """ 579 Check for any thumbnails in the db that are not in the file system 580 Check for any thumbnails exist on the file system that are not in the db 581 Vacuum the db 582 583 :return db rows removed, file system photos removed, db size reduction in bytes 584 """ 585 586 rows = self.thumb_db.md5_names() 587 rows = {row[0] for row in rows} 588 cwd = os.getcwd() 589 os.chdir(self.cache_dir) 590 591 to_delete_from_db = {md5 for md5 in rows if not os.path.exists(md5)} 592 if len(to_delete_from_db): 593 self.thumb_db.delete_thumbnails(list(to_delete_from_db)) 594 595 md5s = {md5 for md5 in os.listdir('.')} - {self.thumb_db.db_fs_name()} 596 to_delete_from_fs = md5s - rows 597 if len(to_delete_from_fs): 598 for md5 in to_delete_from_fs: 599 os.remove(md5) 600 601 os.chdir(cwd) 602 603 size = self.db_size() 604 self.thumb_db.vacuum() 605 606 return len(to_delete_from_db), len(to_delete_from_fs), size - self.db_size() 607 608 609if __name__ == '__main__': 610 db = ThumbnailCacheSql(create_table_if_not_exists=True) 611 db.optimize()