1#!/usr/bin/env python3 2 3# Copyright (C) 2011-2020 Damon Lynch <damonlynch@gmail.com> 4 5# This file is part of Rapid Photo Downloader. 6# 7# Rapid Photo Downloader is free software: you can redistribute it and/or 8# modify it under the terms of the GNU General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# Rapid Photo Downloader is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with Rapid Photo Downloader. If not, 19# see <http://www.gnu.org/licenses/>. 20 21""" 22Worker process to get thumbnails from Thumbnail or FDO cache, or 23read thumbnail / file from the device being downloaded from. 24 25For each device, there is one of these workers. 26 27Sends thumbnail processing tasks to load balancer, which will in turn 28send it to extractors. 29 30By default, will set extractors to get the file's metadata time if 31the metadata time is not already found in the rpd_file. 32""" 33 34__author__ = 'Damon Lynch' 35__copyright__ = "Copyright 2011-2020, Damon Lynch" 36 37try: 38 using_injected = 'profile' in dict(__builtins__) 39except: 40 using_injected = False 41finally: 42 if not using_injected: 43 # use of line_profiler not detected 44 def profile(func): 45 def inner(*args, **kwargs): 46 return func(*args, **kwargs) 47 48 return inner 49 50import os 51import sys 52import logging 53import pickle 54from collections import deque 55from operator import attrgetter 56from typing import Optional, Tuple, Set 57 58import zmq 59from PyQt5.QtGui import QImage 60from PyQt5.QtCore import QSize 61import psutil 62import gphoto2 as gp 63 64try: 65 import rawkit 66 67 have_rawkit = True 68except ImportError: 69 have_rawkit = False 70 71from raphodo.rpdfile import RPDFile 72from raphodo.interprocess import ( 73 WorkerInPublishPullPipeline, GenerateThumbnailsArguments, GenerateThumbnailsResults, 74 ThumbnailExtractorArgument 75) 76from raphodo.constants import ( 77 FileType, ThumbnailSize, ThumbnailCacheStatus, ThumbnailCacheDiskStatus, ExtractionTask, 78 ExtractionProcessing, orientation_offset, thumbnail_offset, ThumbnailCacheOrigin, 79 datetime_offset, datetime_offset_exiftool, thumbnail_offset_exiftool 80) 81from raphodo.camera import ( 82 Camera, CameraProblemEx, gphoto2_python_logging 83) 84from raphodo.cache import ThumbnailCacheSql, FdoCacheLarge 85from raphodo.utilities import (GenerateRandomFileName, create_temp_dir, CacheDirs) 86from raphodo.preferences import Preferences 87from raphodo.rescan import RescanCamera 88from raphodo.fileformats import use_exiftool_on_photo 89from raphodo.heif import have_heif_module 90 91 92def cache_dir_name(device_name: str) -> str: 93 """Generate a directory name for a temporary file cache""" 94 return 'rpd-cache-{}-'.format(device_name[:10].replace(' ', '_')) 95 96 97def split_list(alist: list, wanted_parts=2): 98 """ 99 Split list into smaller parts 100 http://stackoverflow.com/questions/752308/split-list-into-smaller-lists 101 :param alist: the list 102 :param wanted_parts: how many lists it should be split into 103 :return: the split lists 104 """ 105 length = len(alist) 106 return [alist[i * length // wanted_parts: (i + 1) * length // wanted_parts] 107 for i in range(wanted_parts)] 108 109 110def split_indexes(length: int): 111 """ 112 For the length of a list, return a list of indexes into it such 113 that the indexes start with the middle item, then the middle item 114 of the remaining two parts of the list, and so forth. 115 116 Perhaps this algorithm could be optimized, as I did it myself. But 117 hey it works and for now that's the main thing. 118 119 :param length: the length of the list i.e. the number of indexes 120 to be created 121 :return: the list of indexes 122 """ 123 l = list(range(length)) 124 n = [] 125 master = deque([l]) 126 while master: 127 l1, l2 = split_list(master.popleft()) 128 if l2: 129 n.append(l2[0]) 130 l2 = l2[1:] 131 if l1: 132 master.append(l1) 133 if l2: 134 master.append(l2) 135 return n 136 137 138def get_temporal_gaps_and_sequences(rpd_files, temporal_span): 139 """ 140 For a sorted list of rpd_files, identify those rpd_files which are 141 more than the temporal span away from each other, and those which are 142 less than the temporal span from each other. 143 144 Does not analyze clusters. 145 146 For instance, you have 1000 photos from a day's photography. You 147 sort them into a list ordered by time, earliest to latest. You then 148 get all the photos that were take more than an hour after the 149 previous photo, and those that were taken within an hour of the 150 previous photo. 151 . 152 :param rpd_files: the sorted list of rpd_files, earliest first 153 :param temporal_span: the time span that triggers a gap 154 :return: the rpd_files that signify gaps, and all the rest of the 155 rpd_files (which are in sequence) 156 """ 157 if rpd_files: 158 prev = rpd_files[0] 159 gaps = [prev] 160 sequences = [] 161 for i, rpd_file in enumerate(rpd_files[1:]): 162 if rpd_file.modification_time - prev.modification_time > \ 163 temporal_span: 164 gaps.append(rpd_file) 165 else: 166 sequences.append(rpd_file) 167 prev = rpd_file 168 return (gaps, sequences) 169 return None 170 171 172class GetThumbnailFromCache: 173 """ 174 Try to get thumbnail from Rapid Photo Downloader's thumbnail cache 175 or from the FreeDesktop.org cache. 176 """ 177 178 def __init__(self, use_thumbnail_cache: bool) -> None: 179 180 if use_thumbnail_cache: 181 self.thumbnail_cache = ThumbnailCacheSql(create_table_if_not_exists=False) 182 else: 183 self.thumbnail_cache = None 184 185 # Access large size Freedesktop.org thumbnail cache 186 self.fdo_cache_large = FdoCacheLarge() 187 188 self.thumbnail_size_needed = QSize(ThumbnailSize.width, ThumbnailSize.height) 189 190 def image_large_enough(self, size: QSize) -> bool: 191 """Check if image is equal or bigger than thumbnail size.""" 192 return (size.width() >= self.thumbnail_size_needed.width() or 193 size.height() >= self.thumbnail_size_needed.height()) 194 195 def get_from_cache(self, rpd_file: RPDFile, 196 use_thumbnail_cache: bool = True 197 ) -> Tuple[ExtractionTask, bytes, str, ThumbnailCacheOrigin]: 198 """ 199 Attempt to get a thumbnail for the file from the Rapid Photo Downloader thumbnail cache 200 or from the FreeDesktop.org 256x256 thumbnail cache. 201 202 :param rpd_file: 203 :param use_thumbnail_cache: whether to use the 204 :return: 205 """ 206 207 task = ExtractionTask.undetermined 208 thumbnail_bytes = None 209 full_file_name_to_work_on = '' 210 origin = None # type: Optional[ThumbnailCacheOrigin] 211 212 # Attempt to get thumbnail from Thumbnail Cache 213 # (see cache.py for definitions of various caches) 214 if self.thumbnail_cache is not None and use_thumbnail_cache: 215 get_thumbnail = self.thumbnail_cache.get_thumbnail_path( 216 full_file_name=rpd_file.full_file_name, 217 mtime=rpd_file.modification_time, 218 size=rpd_file.size, 219 camera_model=rpd_file.camera_model) 220 rpd_file.thumbnail_cache_status = get_thumbnail.disk_status 221 if get_thumbnail.disk_status != ThumbnailCacheDiskStatus.not_found: 222 origin = ThumbnailCacheOrigin.thumbnail_cache 223 task = ExtractionTask.bypass 224 if get_thumbnail.disk_status == ThumbnailCacheDiskStatus.failure: 225 rpd_file.thumbnail_status = ThumbnailCacheStatus.generation_failed 226 rpd_file.thumbnail_cache_status = ThumbnailCacheDiskStatus.failure 227 elif get_thumbnail.disk_status == ThumbnailCacheDiskStatus.found: 228 rpd_file.thumbnail_cache_status = ThumbnailCacheDiskStatus.found 229 if get_thumbnail.orientation_unknown: 230 rpd_file.thumbnail_status = ThumbnailCacheStatus.orientation_unknown 231 else: 232 rpd_file.thumbnail_status = ThumbnailCacheStatus.ready 233 with open(get_thumbnail.path, 'rb') as thumbnail: 234 thumbnail_bytes = thumbnail.read() 235 236 # Attempt to get thumbnail from large FDO Cache if not found in Thumbnail Cache 237 # and it's not being downloaded directly from a camera (if it's from a camera, it's 238 # not going to be in the FDO cache) 239 240 if task == ExtractionTask.undetermined and not rpd_file.from_camera: 241 get_thumbnail = self.fdo_cache_large.get_thumbnail( 242 full_file_name=rpd_file.full_file_name, 243 modification_time=rpd_file.modification_time, 244 size=rpd_file.size, 245 camera_model=rpd_file.camera_model) 246 if get_thumbnail.disk_status == ThumbnailCacheDiskStatus.found: 247 rpd_file.fdo_thumbnail_256_name = get_thumbnail.path 248 thumb = get_thumbnail.thumbnail # type: QImage 249 if thumb is not None: 250 if self.image_large_enough(thumb.size()): 251 task = ExtractionTask.load_file_directly 252 full_file_name_to_work_on = get_thumbnail.path 253 origin = ThumbnailCacheOrigin.fdo_cache 254 rpd_file.thumbnail_status = ThumbnailCacheStatus.fdo_256_ready 255 256 return task, thumbnail_bytes, full_file_name_to_work_on, origin 257 258 259# How much of the file should be read in from local disk and thus cached 260# by they kernel 261cached_read = dict( 262 cr2=260 * 1024, 263 dng=504 * 1024, 264 nef=400 * 1024 265) 266 267 268def preprocess_thumbnail_from_disk(rpd_file: RPDFile, 269 processing: Set[ExtractionProcessing]) -> ExtractionTask: 270 """ 271 Determine how to get a thumbnail from a photo or video that is not on a camera 272 (although it may have directly come from there during the download process) 273 274 Does not return the name of the file to be worked on -- that's the responsibility 275 of the method calling it. 276 277 :param rpd_file: details about file from which to get thumbnail from 278 :param processing: set that holds processing tasks for the extractors to perform 279 :return: extraction task required 280 """ 281 282 if rpd_file.file_type == FileType.photo: 283 if rpd_file.is_heif(): 284 if have_heif_module: 285 bytes_to_read = rpd_file.size 286 if rpd_file.mdatatime: 287 task = ExtractionTask.load_heif_directly 288 else: 289 task = ExtractionTask.load_heif_and_exif_directly 290 processing.add(ExtractionProcessing.resize) 291 # For now, do not orient, as it seems pyheif or libheif does that automatically 292 # processing.add(ExtractionProcessing.orient) 293 else: 294 # We have no way to convert the file 295 task = ExtractionTask.bypass 296 bytes_to_read = 0 297 elif rpd_file.is_tiff(): 298 available = psutil.virtual_memory().available 299 if rpd_file.size <= available: 300 bytes_to_read = rpd_file.size 301 if rpd_file.mdatatime: 302 task = ExtractionTask.load_file_directly 303 else: 304 task = ExtractionTask.load_file_and_exif_directly 305 processing.add(ExtractionProcessing.resize) 306 else: 307 # Don't try to extract a thumbnail from 308 # a file that is larger than available 309 # memory 310 task = ExtractionTask.bypass 311 bytes_to_read = 0 312 else: 313 if rpd_file.is_jpeg() and rpd_file.from_camera and rpd_file.is_mtp_device: 314 # jpeg photos from smartphones don't have embedded thumbnails 315 task = ExtractionTask.load_file_and_exif_directly 316 processing.add(ExtractionProcessing.resize) 317 else: 318 task = ExtractionTask.load_from_exif 319 processing.add(ExtractionProcessing.orient) 320 bytes_to_read = cached_read.get(rpd_file.extension, 400 * 1024) 321 322 if bytes_to_read: 323 if not rpd_file.download_full_file_name: 324 try: 325 with open(rpd_file.full_file_name, 'rb') as photo: 326 # Bring the file into the operating system's disk cache 327 photo.read(bytes_to_read) 328 except FileNotFoundError: 329 logging.error( 330 "The download file %s does not exist", rpd_file.download_full_file_name 331 ) 332 else: 333 # video 334 if rpd_file.thm_full_name is not None: 335 if not rpd_file.mdatatime: 336 task = ExtractionTask.load_file_directly_metadata_from_secondary 337 # It's the responsibility of the calling code to assign the 338 # secondary_full_file_name 339 else: 340 task = ExtractionTask.load_file_directly 341 processing.add(ExtractionProcessing.strip_bars_video) 342 processing.add(ExtractionProcessing.add_film_strip) 343 else: 344 if rpd_file.mdatatime: 345 task = ExtractionTask.extract_from_file 346 else: 347 task = ExtractionTask.extract_from_file_and_load_metadata 348 349 return task 350 351 352class GenerateThumbnails(WorkerInPublishPullPipeline): 353 354 def __init__(self) -> None: 355 self.random_file_name = GenerateRandomFileName() 356 super().__init__('Thumbnails') 357 358 def cache_full_size_file_from_camera(self, rpd_file: RPDFile) -> bool: 359 """ 360 Get the file from the camera chunk by chunk and cache it. 361 362 :return: True if operation succeeded, False otherwise 363 """ 364 if rpd_file.file_type == FileType.photo: 365 cache_dir = self.photo_cache_dir 366 else: 367 cache_dir = self.video_cache_dir 368 cache_full_file_name = os.path.join( 369 cache_dir, self.random_file_name.name(extension=rpd_file.extension) 370 ) 371 try: 372 self.camera.save_file_by_chunks( 373 dir_name=rpd_file.path, 374 file_name=rpd_file.name, 375 size=rpd_file.size, 376 dest_full_filename=cache_full_file_name, 377 progress_callback=None, 378 check_for_command=self.check_for_controller_directive, 379 return_file_bytes=False 380 ) 381 except CameraProblemEx as e: 382 # TODO report error 383 return False 384 else: 385 rpd_file.cache_full_file_name = cache_full_file_name 386 return True 387 388 def cache_file_chunk_from_camera(self, rpd_file: RPDFile, offset: int) -> bool: 389 if rpd_file.file_type == FileType.photo: 390 cache_dir = self.photo_cache_dir 391 else: 392 cache_dir = self.video_cache_dir 393 cache_full_file_name = os.path.join( 394 cache_dir, self.random_file_name.name(extension=rpd_file.extension) 395 ) 396 try: 397 self.camera.save_file_chunk( 398 dir_name=rpd_file.path, 399 file_name=rpd_file.name, 400 chunk_size_in_bytes=min(offset, rpd_file.size), 401 dest_full_filename=cache_full_file_name 402 ) 403 rpd_file.temp_cache_full_file_chunk = cache_full_file_name 404 return True 405 except CameraProblemEx as e: 406 # TODO problem reporting 407 return False 408 409 def extract_photo_video_from_camera(self, 410 rpd_file: RPDFile, 411 entire_file_required: bool, 412 full_file_name_to_work_on, 413 using_exiftool: bool) -> Tuple[ExtractionTask, str, bool]: 414 """ 415 Extract part of a photo of video to be able to get the orientation 416 and date time metadata, if and only if we know how much of the file 417 is needed to get the thumbnail. 418 419 Otherwise, download the entire photo or video from the camera to be able 420 to generate the thumbnail and cache it. 421 422 :param rpd_file: photo or video 423 :param entire_file_required: whether we already know (from scanning) that 424 the entire file is required 425 :param full_file_name_to_work_on: file name and path of the photo or video 426 :param using_exiftool: if all the metadata extraction is done using ExifTool 427 :return: extraction task, full file name, and whether the full file name 428 refers to a temporary file that should be deleted 429 """ 430 431 task = ExtractionTask.undetermined 432 file_to_work_on_is_temporary = False 433 434 if rpd_file.is_mtp_device and rpd_file.file_type == FileType.video: 435 entire_file_required = True 436 437 if not entire_file_required: 438 # For many photos videos, extract a small part of the file and use 439 # that to get the metadata 440 if using_exiftool: 441 offset = thumbnail_offset_exiftool.get(rpd_file.extension) 442 else: 443 offset = thumbnail_offset.get(rpd_file.extension) 444 if offset: 445 if using_exiftool: 446 offset = max(offset, datetime_offset_exiftool.get(rpd_file.extension)) 447 else: 448 offset = max(offset, datetime_offset.get(rpd_file.extension)) 449 450 if offset and self.cache_file_chunk_from_camera(rpd_file, offset): 451 if rpd_file.file_type == FileType.photo: 452 task = ExtractionTask.load_from_bytes_metadata_from_temp_extract 453 else: 454 task = ExtractionTask.extract_from_file_and_load_metadata 455 file_to_work_on_is_temporary = True 456 full_file_name_to_work_on = rpd_file.temp_cache_full_file_chunk 457 if task == ExtractionTask.undetermined: 458 if self.cache_full_size_file_from_camera(rpd_file): 459 task = ExtractionTask.extract_from_file_and_load_metadata 460 full_file_name_to_work_on = rpd_file.cache_full_file_name 461 else: 462 # Failed to generate thumbnail 463 task = ExtractionTask.bypass 464 465 return task, full_file_name_to_work_on, file_to_work_on_is_temporary 466 467 def do_work(self) -> None: 468 try: 469 self.generate_thumbnails() 470 except SystemExit as e: 471 sys.exit(e) 472 except Exception: 473 if hasattr(self, 'device_name'): 474 logging.error("Exception generating thumbnails for %s", self.device_name) 475 else: 476 logging.error("Exception generating thumbnails") 477 logging.exception("Traceback:") 478 479 def generate_thumbnails(self) -> None: 480 self.camera = None 481 arguments = pickle.loads(self.content) # type: GenerateThumbnailsArguments 482 self.device_name = arguments.name 483 logging.info("Generating %s thumbnails for %s", len(arguments.rpd_files), arguments.name) 484 if arguments.log_gphoto2: 485 self.gphoto2_logging = gphoto2_python_logging() 486 487 self.frontend = self.context.socket(zmq.PUSH) 488 self.frontend.connect("tcp://localhost:{}".format(arguments.frontend_port)) 489 490 self.prefs = Preferences() 491 492 # Whether we must use ExifTool to read photo metadata 493 force_exiftool = self.prefs.force_exiftool 494 495 # If the entire photo or video is required to extract the thumbnail, which is determined 496 # when extracting sample metadata from a photo or video during the device scan 497 entire_photo_required = arguments.entire_photo_required 498 entire_video_required = arguments.entire_video_required 499 500 # Access and generate Rapid Photo Downloader thumbnail cache 501 use_thumbnail_cache = self.prefs.use_thumbnail_cache 502 503 thumbnail_caches = GetThumbnailFromCache(use_thumbnail_cache=use_thumbnail_cache) 504 505 photo_cache_dir = video_cache_dir = None 506 cache_file_from_camera = force_exiftool 507 508 rpd_files = arguments.rpd_files 509 510 # with open('tests/thumbnail_data_medium_no_tiff', 'wb') as f: 511 # pickle.dump(rpd_files, f) 512 513 # Must sort files by modification time prior to temporal analysis needed to figure out 514 # which thumbnails to prioritize 515 rpd_files = sorted(rpd_files, key=attrgetter('modification_time')) 516 517 time_span = arguments.proximity_seconds 518 519 rpd_files2 = [] 520 521 if rpd_files: 522 gaps, sequences = get_temporal_gaps_and_sequences(rpd_files, time_span) 523 524 rpd_files2.extend(gaps) 525 526 indexes = split_indexes(len(sequences)) 527 rpd_files2.extend([sequences[idx] for idx in indexes]) 528 529 assert len(rpd_files) == len(rpd_files2) 530 rpd_files = rpd_files2 531 532 if arguments.camera is not None: 533 self.camera = Camera( 534 model=arguments.camera, port=arguments.port, 535 specific_folders=self.prefs.folders_to_scan 536 ) 537 538 if not self.camera.camera_initialized: 539 # There is nothing to do here: exit! 540 logging.debug( 541 "Prematurely exiting thumbnail generation due to lack of access to camera %s", 542 arguments.camera 543 ) 544 self.content = pickle.dumps( 545 GenerateThumbnailsResults( 546 scan_id=arguments.scan_id, 547 camera_removed=True, 548 ), pickle.HIGHEST_PROTOCOL 549 ) 550 self.send_message_to_sink() 551 self.disconnect_logging() 552 self.send_finished_command() 553 sys.exit(0) 554 555 if not cache_file_from_camera: 556 for rpd_file in rpd_files: 557 if use_exiftool_on_photo(rpd_file.extension, 558 preview_extraction_irrelevant=False): 559 cache_file_from_camera = True 560 break 561 562 must_make_cache_dirs = (not self.camera.can_fetch_thumbnails or cache_file_from_camera) 563 564 if must_make_cache_dirs or arguments.need_video_cache_dir \ 565 or arguments.need_photo_cache_dir: 566 # If downloading complete copy of the files to 567 # generate previews, then may as well cache them to speed up 568 # the download process 569 self.photo_cache_dir = create_temp_dir( 570 folder=arguments.cache_dirs.photo_cache_dir, 571 prefix=cache_dir_name(self.device_name) 572 ) 573 self.video_cache_dir = create_temp_dir( 574 folder=arguments.cache_dirs.video_cache_dir, 575 prefix=cache_dir_name(self.device_name) 576 ) 577 cache_dirs = CacheDirs(self.photo_cache_dir, self.video_cache_dir) 578 self.content = pickle.dumps( 579 GenerateThumbnailsResults( 580 scan_id=arguments.scan_id, 581 cache_dirs=cache_dirs 582 ), pickle.HIGHEST_PROTOCOL 583 ) 584 self.send_message_to_sink() 585 586 from_thumb_cache = 0 587 from_fdo_cache = 0 588 589 if self.camera: 590 rescan = RescanCamera(camera=self.camera, prefs=self.prefs) 591 rescan.rescan_camera(rpd_files) 592 rpd_files = rescan.rpd_files 593 if rescan.missing_rpd_files: 594 logging.error( 595 "%s files could not be relocated on %s", 596 len(rescan.missing_rpd_files), self.camera.display_name 597 ) 598 for rpd_file in rescan.missing_rpd_files: # type: RPDFile 599 self.content = pickle.dumps( 600 GenerateThumbnailsResults(rpd_file=rpd_file, thumbnail_bytes=None), 601 pickle.HIGHEST_PROTOCOL 602 ) 603 self.send_message_to_sink() 604 605 for rpd_file in rpd_files: # type: RPDFile 606 # Check to see if the process has received a command 607 self.check_for_controller_directive() 608 609 exif_buffer = None 610 file_to_work_on_is_temporary = False 611 secondary_full_file_name = '' 612 processing = set() # type: Set[ExtractionProcessing] 613 614 # Attempt to get thumbnail from Thumbnail Cache 615 # (see cache.py for definitions of various caches) 616 617 cache_search = thumbnail_caches.get_from_cache(rpd_file) 618 task, thumbnail_bytes, full_file_name_to_work_on, origin = cache_search 619 if task != ExtractionTask.undetermined: 620 if origin == ThumbnailCacheOrigin.thumbnail_cache: 621 from_thumb_cache += 1 622 else: 623 assert origin == ThumbnailCacheOrigin.fdo_cache 624 logging.debug( 625 "Thumbnail for %s found in large FDO cache", rpd_file.full_file_name 626 ) 627 from_fdo_cache += 1 628 processing.add(ExtractionProcessing.resize) 629 if not rpd_file.mdatatime: 630 # Since we're extracting the thumbnail from the FDO cache, 631 # need to grab its metadata too. 632 # Reassign the task 633 task = ExtractionTask.load_file_directly_metadata_from_secondary 634 # It's not being downloaded from a camera, so nothing 635 # special to do except assign the name of the file from which 636 # to extract the metadata 637 secondary_full_file_name = rpd_file.full_file_name 638 logging.debug( 639 "Although thumbnail found in the cache, tasked to extract " 640 "file time recorded in metadata from %s", secondary_full_file_name 641 ) 642 if task == ExtractionTask.undetermined: 643 # Thumbnail was not found in any cache: extract it 644 if self.camera: # type: Camera 645 if rpd_file.file_type == FileType.photo: 646 if rpd_file.is_heif(): 647 # Load HEIF / HEIC using entire file. 648 # We are assuming that there is no tool to extract a 649 # preview image from an HEIF / HEIC, or the file simply 650 # does not have one to extract. 651 if self.cache_full_size_file_from_camera(rpd_file): 652 task = ExtractionTask.load_heif_and_exif_directly 653 processing.add(ExtractionProcessing.resize) 654 full_file_name_to_work_on = rpd_file.cache_full_file_name 655 # For now, do not orient, as it seems pyheif or libheif does 656 # that automatically. 657 # processing.add(ExtractionProcessing.orient) 658 659 elif self.camera.can_fetch_thumbnails: 660 task = ExtractionTask.load_from_bytes 661 if rpd_file.is_jpeg_type(): 662 # gPhoto2 knows how to get jpeg thumbnails 663 try: 664 thumbnail_bytes = self.camera.get_thumbnail( 665 rpd_file.path, rpd_file.name 666 ) 667 except CameraProblemEx as e: 668 # TODO handle error? 669 thumbnail_bytes = None 670 else: 671 672 if force_exiftool or use_exiftool_on_photo(rpd_file.extension, 673 preview_extraction_irrelevant=False): 674 task, full_file_name_to_work_on, \ 675 file_to_work_on_is_temporary =\ 676 self.extract_photo_video_from_camera( 677 rpd_file, entire_photo_required, 678 full_file_name_to_work_on, 679 True 680 ) 681 if task == ExtractionTask.load_from_bytes_metadata_from_temp_extract: 682 secondary_full_file_name = full_file_name_to_work_on 683 file_to_work_on_is_temporary = False 684 685 else: 686 # gPhoto2 does not know how to get RAW thumbnails, so we do that 687 # part ourselves 688 if rpd_file.extension == 'crw': 689 # Could cache this file, since reading its entirety 690 # But does anyone download a CRW file from the camera these 691 # days?! 692 bytes_to_read = rpd_file.size 693 else: 694 bytes_to_read = min( 695 rpd_file.size, orientation_offset.get( 696 rpd_file.extension, 500 697 ) 698 ) 699 exif_buffer = self.camera.get_exif_extract( 700 rpd_file.path, rpd_file.name, bytes_to_read 701 ) 702 try: 703 thumbnail_bytes = self.camera.get_thumbnail( 704 rpd_file.path, rpd_file.name 705 ) 706 except CameraProblemEx as e: 707 # TODO report error 708 thumbnail_bytes = None 709 processing.add(ExtractionProcessing.strip_bars_photo) 710 processing.add(ExtractionProcessing.orient) 711 else: 712 # Many (all?) jpegs from phones don't include jpeg previews, 713 # so need to render from the entire jpeg itself. Slow! 714 715 # If rawkit is not installed, then extract merely a part of 716 # phone's raw format, and try to extract the jpeg preview 717 # from it (which probably doesn't exist!). This is fast. 718 # If have rawkit, download and render an image from the 719 # RAW 720 721 if not rpd_file.is_jpeg() and not have_rawkit: 722 bytes_to_read = thumbnail_offset.get(rpd_file.extension) 723 if bytes_to_read: 724 exif_buffer = self.camera.get_exif_extract( 725 rpd_file.path, rpd_file.name, bytes_to_read 726 ) 727 task = ExtractionTask.load_from_exif_buffer 728 processing.add(ExtractionProcessing.orient) 729 if (task == ExtractionTask.undetermined and 730 self.cache_full_size_file_from_camera(rpd_file)): 731 if rpd_file.is_jpeg(): 732 task = ExtractionTask.load_file_and_exif_directly 733 processing.add(ExtractionProcessing.resize) 734 processing.add(ExtractionProcessing.orient) 735 else: 736 task = ExtractionTask.load_from_exif 737 processing.add(ExtractionProcessing.resize) 738 processing.add(ExtractionProcessing.orient) 739 full_file_name_to_work_on = rpd_file.cache_full_file_name 740 else: 741 # Failed to generate thumbnail 742 task = ExtractionTask.bypass 743 else: 744 # video from camera 745 if rpd_file.thm_full_name is not None: 746 # Fortunately, we have a special video thumbnail file 747 # Still need to get metadata time, however. 748 749 if entire_video_required: 750 offset = rpd_file.size 751 else: 752 offset = datetime_offset.get(rpd_file.extension) 753 # If there is no offset, there is no point trying to extract the 754 # metadata time from part of the video. It's not ideal, 755 # but if this is from a camera on which there were any other files 756 # we can assume we've got a somewhat accurate date time for it from 757 # the modification time. 758 # The only exception is if the video file is not that big, in which 759 # case it's worth reading in its entirety: 760 if offset is None and rpd_file.size < 4000000: 761 offset = rpd_file.size 762 763 if rpd_file.mdatatime or not offset: 764 task = ExtractionTask.load_from_bytes 765 elif self.cache_file_chunk_from_camera(rpd_file, offset): 766 task = ExtractionTask.load_from_bytes_metadata_from_temp_extract 767 secondary_full_file_name = rpd_file.temp_cache_full_file_chunk 768 else: 769 # For some reason was unable to download part of the video file 770 task = ExtractionTask.load_from_bytes 771 772 try: 773 thumbnail_bytes = self.camera.get_THM_file(rpd_file.thm_full_name) 774 except CameraProblemEx as e: 775 # TODO report error 776 thumbnail_bytes = None 777 processing.add(ExtractionProcessing.strip_bars_video) 778 processing.add(ExtractionProcessing.add_film_strip) 779 else: 780 task, full_file_name_to_work_on, file_to_work_on_is_temporary = \ 781 self.extract_photo_video_from_camera( 782 rpd_file, entire_video_required, 783 full_file_name_to_work_on, 784 False 785 ) 786 else: 787 # File is not on a camera 788 task = preprocess_thumbnail_from_disk(rpd_file=rpd_file, processing=processing) 789 if task != ExtractionTask.bypass: 790 if rpd_file.thm_full_name is not None: 791 full_file_name_to_work_on = rpd_file.thm_full_name 792 if task == ExtractionTask.load_file_directly_metadata_from_secondary: 793 secondary_full_file_name = rpd_file.full_file_name 794 else: 795 full_file_name_to_work_on = rpd_file.full_file_name 796 797 if task == ExtractionTask.bypass: 798 self.content = pickle.dumps( 799 GenerateThumbnailsResults(rpd_file=rpd_file, thumbnail_bytes=thumbnail_bytes), 800 pickle.HIGHEST_PROTOCOL 801 ) 802 self.send_message_to_sink() 803 804 elif task != ExtractionTask.undetermined: 805 # Send data to load balancer, which will send to one of its 806 # workers 807 808 self.content = pickle.dumps( 809 ThumbnailExtractorArgument( 810 rpd_file=rpd_file, 811 task=task, 812 processing=processing, 813 full_file_name_to_work_on=full_file_name_to_work_on, 814 secondary_full_file_name=secondary_full_file_name, 815 exif_buffer=exif_buffer, 816 thumbnail_bytes=thumbnail_bytes, 817 use_thumbnail_cache=use_thumbnail_cache, 818 file_to_work_on_is_temporary=file_to_work_on_is_temporary, 819 write_fdo_thumbnail=False, 820 send_thumb_to_main=True, 821 force_exiftool=force_exiftool 822 ), 823 pickle.HIGHEST_PROTOCOL) 824 self.frontend.send_multipart([b'data', self.content]) 825 826 if arguments.camera: 827 self.camera.free_camera() 828 # Delete our temporary cache directories if they are empty 829 if photo_cache_dir is not None: 830 if not os.listdir(self.photo_cache_dir): 831 os.rmdir(self.photo_cache_dir) 832 if video_cache_dir is not None: 833 if not os.listdir(self.video_cache_dir): 834 os.rmdir(self.video_cache_dir) 835 836 logging.debug("Finished phase 1 of thumbnail generation for %s", self.device_name) 837 if from_thumb_cache: 838 logging.info( 839 "{} of {} thumbnails for {} came from thumbnail cache".format( 840 from_thumb_cache, len(rpd_files), self.device_name 841 ) 842 ) 843 if from_fdo_cache: 844 logging.info( 845 "{} of {} thumbnails of for {} came from Free Desktop cache".format( 846 from_fdo_cache, len(rpd_files), self.device_name 847 ) 848 ) 849 850 self.disconnect_logging() 851 self.send_finished_command() 852 853 def cleanup_pre_stop(self): 854 if self.camera is not None: 855 self.camera.free_camera() 856 857 858if __name__ == "__main__": 859 generate_thumbnails = GenerateThumbnails() 860