1#!/usr/bin/env python3
2
3# Copyright (C) 2011-2020 Damon Lynch <damonlynch@gmail.com>
4
5# This file is part of Rapid Photo Downloader.
6#
7# Rapid Photo Downloader is free software: you can redistribute it and/or
8# modify it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# Rapid Photo Downloader is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with Rapid Photo Downloader.  If not,
19# see <http://www.gnu.org/licenses/>.
20
21"""
22Worker process to get thumbnails from Thumbnail or FDO cache, or
23read thumbnail / file from the device being downloaded from.
24
25For each device, there is one of these workers.
26
27Sends thumbnail processing tasks to load balancer, which will in turn
28send it to extractors.
29
30By default, will set extractors to get the file's metadata time if
31the metadata time is not already found in the rpd_file.
32"""
33
34__author__ = 'Damon Lynch'
35__copyright__ = "Copyright 2011-2020, Damon Lynch"
36
37try:
38    using_injected = 'profile' in dict(__builtins__)
39except:
40    using_injected = False
41finally:
42    if not using_injected:
43        # use of line_profiler not detected
44        def profile(func):
45            def inner(*args, **kwargs):
46                return func(*args, **kwargs)
47
48            return inner
49
50import os
51import sys
52import logging
53import pickle
54from collections import deque
55from operator import attrgetter
56from typing import Optional, Tuple, Set
57
58import zmq
59from PyQt5.QtGui import QImage
60from PyQt5.QtCore import QSize
61import psutil
62import gphoto2 as gp
63
64try:
65    import rawkit
66
67    have_rawkit = True
68except ImportError:
69    have_rawkit = False
70
71from raphodo.rpdfile import RPDFile
72from raphodo.interprocess import (
73    WorkerInPublishPullPipeline, GenerateThumbnailsArguments, GenerateThumbnailsResults,
74    ThumbnailExtractorArgument
75)
76from raphodo.constants import (
77    FileType, ThumbnailSize, ThumbnailCacheStatus, ThumbnailCacheDiskStatus, ExtractionTask,
78    ExtractionProcessing, orientation_offset, thumbnail_offset, ThumbnailCacheOrigin,
79    datetime_offset, datetime_offset_exiftool, thumbnail_offset_exiftool
80)
81from raphodo.camera import (
82    Camera, CameraProblemEx, gphoto2_python_logging
83)
84from raphodo.cache import ThumbnailCacheSql, FdoCacheLarge
85from raphodo.utilities import (GenerateRandomFileName, create_temp_dir, CacheDirs)
86from raphodo.preferences import Preferences
87from raphodo.rescan import RescanCamera
88from raphodo.fileformats import use_exiftool_on_photo
89from raphodo.heif import have_heif_module
90
91
92def cache_dir_name(device_name: str) -> str:
93    """Generate a directory name for a temporary file cache"""
94    return 'rpd-cache-{}-'.format(device_name[:10].replace(' ', '_'))
95
96
97def split_list(alist: list, wanted_parts=2):
98    """
99    Split list into smaller parts
100    http://stackoverflow.com/questions/752308/split-list-into-smaller-lists
101    :param alist: the list
102    :param wanted_parts: how many lists it should be split into
103    :return: the split lists
104    """
105    length = len(alist)
106    return [alist[i * length // wanted_parts: (i + 1) * length // wanted_parts]
107            for i in range(wanted_parts)]
108
109
110def split_indexes(length: int):
111    """
112    For the length of a list, return a list of indexes into it such
113    that the indexes start with the middle item, then the middle item
114    of the remaining two parts of the list, and so forth.
115
116    Perhaps this algorithm could be optimized, as I did it myself. But
117    hey it works and for now that's the main thing.
118
119    :param length: the length of the list i.e. the number of indexes
120     to be created
121    :return: the list of indexes
122    """
123    l = list(range(length))
124    n = []
125    master = deque([l])
126    while master:
127        l1, l2 = split_list(master.popleft())
128        if l2:
129            n.append(l2[0])
130            l2 = l2[1:]
131        if l1:
132            master.append(l1)
133        if l2:
134            master.append(l2)
135    return n
136
137
138def get_temporal_gaps_and_sequences(rpd_files, temporal_span):
139    """
140    For a sorted list of rpd_files, identify those rpd_files which are
141    more than the temporal span away from each other, and those which are
142    less than the temporal span from each other.
143
144    Does not analyze clusters.
145
146    For instance, you have 1000 photos from a day's photography. You
147    sort them into a list ordered by time, earliest to latest. You then
148    get all the photos that were take more than an hour after the
149    previous photo, and those that were taken within an hour of the
150    previous photo.
151    .
152    :param rpd_files: the sorted list of rpd_files, earliest first
153    :param temporal_span: the time span that triggers a gap
154    :return: the rpd_files that signify gaps, and all the rest of the
155    rpd_files (which are in sequence)
156    """
157    if rpd_files:
158        prev = rpd_files[0]
159        gaps = [prev]
160        sequences = []
161        for i, rpd_file in enumerate(rpd_files[1:]):
162            if rpd_file.modification_time - prev.modification_time > \
163                    temporal_span:
164                gaps.append(rpd_file)
165            else:
166                sequences.append(rpd_file)
167            prev = rpd_file
168        return (gaps, sequences)
169    return None
170
171
172class GetThumbnailFromCache:
173    """
174    Try to get thumbnail from Rapid Photo Downloader's thumbnail cache
175    or from the FreeDesktop.org cache.
176    """
177
178    def __init__(self, use_thumbnail_cache: bool) -> None:
179
180        if use_thumbnail_cache:
181            self.thumbnail_cache = ThumbnailCacheSql(create_table_if_not_exists=False)
182        else:
183            self.thumbnail_cache = None
184
185        # Access large size Freedesktop.org thumbnail cache
186        self.fdo_cache_large = FdoCacheLarge()
187
188        self.thumbnail_size_needed = QSize(ThumbnailSize.width, ThumbnailSize.height)
189
190    def image_large_enough(self, size: QSize) -> bool:
191        """Check if image is equal or bigger than thumbnail size."""
192        return (size.width() >= self.thumbnail_size_needed.width() or
193                size.height() >= self.thumbnail_size_needed.height())
194
195    def get_from_cache(self, rpd_file: RPDFile,
196                       use_thumbnail_cache: bool = True
197                       ) -> Tuple[ExtractionTask, bytes, str, ThumbnailCacheOrigin]:
198        """
199        Attempt to get a thumbnail for the file from the Rapid Photo Downloader thumbnail cache
200        or from the FreeDesktop.org 256x256 thumbnail cache.
201
202        :param rpd_file:
203        :param use_thumbnail_cache: whether to use the
204        :return:
205        """
206
207        task = ExtractionTask.undetermined
208        thumbnail_bytes = None
209        full_file_name_to_work_on = ''
210        origin = None  # type: Optional[ThumbnailCacheOrigin]
211
212        # Attempt to get thumbnail from Thumbnail Cache
213        # (see cache.py for definitions of various caches)
214        if self.thumbnail_cache is not None and use_thumbnail_cache:
215            get_thumbnail = self.thumbnail_cache.get_thumbnail_path(
216                full_file_name=rpd_file.full_file_name,
217                mtime=rpd_file.modification_time,
218                size=rpd_file.size,
219                camera_model=rpd_file.camera_model)
220            rpd_file.thumbnail_cache_status = get_thumbnail.disk_status
221            if get_thumbnail.disk_status != ThumbnailCacheDiskStatus.not_found:
222                origin = ThumbnailCacheOrigin.thumbnail_cache
223                task = ExtractionTask.bypass
224                if get_thumbnail.disk_status == ThumbnailCacheDiskStatus.failure:
225                    rpd_file.thumbnail_status = ThumbnailCacheStatus.generation_failed
226                    rpd_file.thumbnail_cache_status = ThumbnailCacheDiskStatus.failure
227                elif get_thumbnail.disk_status == ThumbnailCacheDiskStatus.found:
228                    rpd_file.thumbnail_cache_status = ThumbnailCacheDiskStatus.found
229                    if get_thumbnail.orientation_unknown:
230                        rpd_file.thumbnail_status = ThumbnailCacheStatus.orientation_unknown
231                    else:
232                        rpd_file.thumbnail_status = ThumbnailCacheStatus.ready
233                    with open(get_thumbnail.path, 'rb') as thumbnail:
234                        thumbnail_bytes = thumbnail.read()
235
236        # Attempt to get thumbnail from large FDO Cache if not found in Thumbnail Cache
237        # and it's not being downloaded directly from a camera (if it's from a camera, it's
238        # not going to be in the FDO cache)
239
240        if task == ExtractionTask.undetermined and not rpd_file.from_camera:
241            get_thumbnail = self.fdo_cache_large.get_thumbnail(
242                full_file_name=rpd_file.full_file_name,
243                modification_time=rpd_file.modification_time,
244                size=rpd_file.size,
245                camera_model=rpd_file.camera_model)
246            if get_thumbnail.disk_status == ThumbnailCacheDiskStatus.found:
247                rpd_file.fdo_thumbnail_256_name = get_thumbnail.path
248                thumb = get_thumbnail.thumbnail  # type: QImage
249                if thumb is not None:
250                    if self.image_large_enough(thumb.size()):
251                        task = ExtractionTask.load_file_directly
252                        full_file_name_to_work_on = get_thumbnail.path
253                        origin = ThumbnailCacheOrigin.fdo_cache
254                        rpd_file.thumbnail_status = ThumbnailCacheStatus.fdo_256_ready
255
256        return task, thumbnail_bytes, full_file_name_to_work_on, origin
257
258
259# How much of the file should be read in from local disk and thus cached
260# by they kernel
261cached_read = dict(
262    cr2=260 * 1024,
263    dng=504 * 1024,
264    nef=400 * 1024
265)
266
267
268def preprocess_thumbnail_from_disk(rpd_file: RPDFile,
269                                   processing: Set[ExtractionProcessing]) -> ExtractionTask:
270    """
271    Determine how to get a thumbnail from a photo or video that is not on a camera
272    (although it may have directly come from there during the download process)
273
274    Does not return the name of the file to be worked on -- that's the responsibility
275    of the method calling it.
276
277    :param rpd_file: details about file from which to get thumbnail from
278    :param processing: set that holds processing tasks for the extractors to perform
279    :return: extraction task required
280    """
281
282    if rpd_file.file_type == FileType.photo:
283        if rpd_file.is_heif():
284            if have_heif_module:
285                bytes_to_read = rpd_file.size
286                if rpd_file.mdatatime:
287                    task = ExtractionTask.load_heif_directly
288                else:
289                    task = ExtractionTask.load_heif_and_exif_directly
290                processing.add(ExtractionProcessing.resize)
291                # For now, do not orient, as it seems pyheif or libheif does that automatically
292                # processing.add(ExtractionProcessing.orient)
293            else:
294                # We have no way to convert the file
295                task = ExtractionTask.bypass
296                bytes_to_read = 0
297        elif rpd_file.is_tiff():
298            available = psutil.virtual_memory().available
299            if rpd_file.size <= available:
300                bytes_to_read = rpd_file.size
301                if rpd_file.mdatatime:
302                    task = ExtractionTask.load_file_directly
303                else:
304                    task = ExtractionTask.load_file_and_exif_directly
305                processing.add(ExtractionProcessing.resize)
306            else:
307                # Don't try to extract a thumbnail from
308                # a file that is larger than available
309                # memory
310                task = ExtractionTask.bypass
311                bytes_to_read = 0
312        else:
313            if rpd_file.is_jpeg() and rpd_file.from_camera and rpd_file.is_mtp_device:
314                # jpeg photos from smartphones don't have embedded thumbnails
315                task = ExtractionTask.load_file_and_exif_directly
316                processing.add(ExtractionProcessing.resize)
317            else:
318                task = ExtractionTask.load_from_exif
319            processing.add(ExtractionProcessing.orient)
320            bytes_to_read = cached_read.get(rpd_file.extension, 400 * 1024)
321
322        if bytes_to_read:
323            if not rpd_file.download_full_file_name:
324                try:
325                    with open(rpd_file.full_file_name, 'rb') as photo:
326                        # Bring the file into the operating system's disk cache
327                        photo.read(bytes_to_read)
328                except FileNotFoundError:
329                    logging.error(
330                        "The download file %s does not exist", rpd_file.download_full_file_name
331                    )
332    else:
333        # video
334        if rpd_file.thm_full_name is not None:
335            if not rpd_file.mdatatime:
336                task = ExtractionTask.load_file_directly_metadata_from_secondary
337                # It's the responsibility of the calling code to assign the
338                # secondary_full_file_name
339            else:
340                task = ExtractionTask.load_file_directly
341            processing.add(ExtractionProcessing.strip_bars_video)
342            processing.add(ExtractionProcessing.add_film_strip)
343        else:
344            if rpd_file.mdatatime:
345                task = ExtractionTask.extract_from_file
346            else:
347                task = ExtractionTask.extract_from_file_and_load_metadata
348
349    return task
350
351
352class GenerateThumbnails(WorkerInPublishPullPipeline):
353
354    def __init__(self) -> None:
355        self.random_file_name = GenerateRandomFileName()
356        super().__init__('Thumbnails')
357
358    def cache_full_size_file_from_camera(self, rpd_file: RPDFile) -> bool:
359        """
360        Get the file from the camera chunk by chunk and cache it.
361
362        :return: True if operation succeeded, False otherwise
363        """
364        if rpd_file.file_type == FileType.photo:
365            cache_dir = self.photo_cache_dir
366        else:
367            cache_dir = self.video_cache_dir
368        cache_full_file_name = os.path.join(
369            cache_dir, self.random_file_name.name(extension=rpd_file.extension)
370        )
371        try:
372            self.camera.save_file_by_chunks(
373                dir_name=rpd_file.path,
374                file_name=rpd_file.name,
375                size=rpd_file.size,
376                dest_full_filename=cache_full_file_name,
377                progress_callback=None,
378                check_for_command=self.check_for_controller_directive,
379                return_file_bytes=False
380            )
381        except CameraProblemEx as e:
382            # TODO report error
383            return False
384        else:
385            rpd_file.cache_full_file_name = cache_full_file_name
386            return True
387
388    def cache_file_chunk_from_camera(self, rpd_file: RPDFile, offset: int) -> bool:
389        if rpd_file.file_type == FileType.photo:
390            cache_dir = self.photo_cache_dir
391        else:
392            cache_dir = self.video_cache_dir
393        cache_full_file_name = os.path.join(
394            cache_dir, self.random_file_name.name(extension=rpd_file.extension)
395        )
396        try:
397            self.camera.save_file_chunk(
398                dir_name=rpd_file.path,
399                file_name=rpd_file.name,
400                chunk_size_in_bytes=min(offset, rpd_file.size),
401                dest_full_filename=cache_full_file_name
402            )
403            rpd_file.temp_cache_full_file_chunk = cache_full_file_name
404            return True
405        except CameraProblemEx as e:
406            # TODO problem reporting
407            return False
408
409    def extract_photo_video_from_camera(self,
410                                        rpd_file: RPDFile,
411                                        entire_file_required: bool,
412                                        full_file_name_to_work_on,
413                                        using_exiftool: bool) -> Tuple[ExtractionTask, str, bool]:
414        """
415        Extract part of a photo of video to be able to get the orientation
416        and date time metadata, if and only if we know how much of the file
417        is needed to get the thumbnail.
418
419        Otherwise, download the entire photo or video from the camera to be able
420        to generate the thumbnail and cache it.
421
422        :param rpd_file: photo or video
423        :param entire_file_required: whether we already know (from scanning) that
424         the entire file is required
425        :param full_file_name_to_work_on: file name and path of the photo or video
426        :param using_exiftool: if all the metadata extraction is done using ExifTool
427        :return: extraction task, full file name, and whether the full file name
428         refers to a temporary file that should be deleted
429        """
430
431        task = ExtractionTask.undetermined
432        file_to_work_on_is_temporary = False
433
434        if rpd_file.is_mtp_device and rpd_file.file_type == FileType.video:
435            entire_file_required = True
436
437        if not entire_file_required:
438            # For many photos videos, extract a small part of the file and use
439            # that to get the metadata
440            if using_exiftool:
441                offset = thumbnail_offset_exiftool.get(rpd_file.extension)
442            else:
443                offset = thumbnail_offset.get(rpd_file.extension)
444            if offset:
445                if using_exiftool:
446                    offset = max(offset, datetime_offset_exiftool.get(rpd_file.extension))
447                else:
448                    offset = max(offset, datetime_offset.get(rpd_file.extension))
449
450            if offset and self.cache_file_chunk_from_camera(rpd_file, offset):
451                if rpd_file.file_type == FileType.photo:
452                    task = ExtractionTask.load_from_bytes_metadata_from_temp_extract
453                else:
454                    task = ExtractionTask.extract_from_file_and_load_metadata
455                    file_to_work_on_is_temporary = True
456                full_file_name_to_work_on = rpd_file.temp_cache_full_file_chunk
457        if task == ExtractionTask.undetermined:
458            if self.cache_full_size_file_from_camera(rpd_file):
459                task = ExtractionTask.extract_from_file_and_load_metadata
460                full_file_name_to_work_on = rpd_file.cache_full_file_name
461            else:
462                # Failed to generate thumbnail
463                task = ExtractionTask.bypass
464
465        return task, full_file_name_to_work_on, file_to_work_on_is_temporary
466
467    def do_work(self) -> None:
468        try:
469            self.generate_thumbnails()
470        except SystemExit as e:
471            sys.exit(e)
472        except Exception:
473            if hasattr(self, 'device_name'):
474                logging.error("Exception generating thumbnails for %s", self.device_name)
475            else:
476                logging.error("Exception generating thumbnails")
477            logging.exception("Traceback:")
478
479    def generate_thumbnails(self) -> None:
480        self.camera = None
481        arguments = pickle.loads(self.content)  # type: GenerateThumbnailsArguments
482        self.device_name = arguments.name
483        logging.info("Generating %s thumbnails for %s", len(arguments.rpd_files), arguments.name)
484        if arguments.log_gphoto2:
485            self.gphoto2_logging = gphoto2_python_logging()
486
487        self.frontend = self.context.socket(zmq.PUSH)
488        self.frontend.connect("tcp://localhost:{}".format(arguments.frontend_port))
489
490        self.prefs = Preferences()
491
492        # Whether we must use ExifTool to read photo metadata
493        force_exiftool = self.prefs.force_exiftool
494
495        # If the entire photo or video is required to extract the thumbnail, which is determined
496        # when extracting sample metadata from a photo or video during the device scan
497        entire_photo_required = arguments.entire_photo_required
498        entire_video_required = arguments.entire_video_required
499
500        # Access and generate Rapid Photo Downloader thumbnail cache
501        use_thumbnail_cache = self.prefs.use_thumbnail_cache
502
503        thumbnail_caches = GetThumbnailFromCache(use_thumbnail_cache=use_thumbnail_cache)
504
505        photo_cache_dir = video_cache_dir = None
506        cache_file_from_camera = force_exiftool
507
508        rpd_files = arguments.rpd_files
509
510        # with open('tests/thumbnail_data_medium_no_tiff', 'wb') as f:
511        #     pickle.dump(rpd_files, f)
512
513        # Must sort files by modification time prior to temporal analysis needed to figure out
514        # which thumbnails to prioritize
515        rpd_files = sorted(rpd_files, key=attrgetter('modification_time'))
516
517        time_span = arguments.proximity_seconds
518
519        rpd_files2 = []
520
521        if rpd_files:
522            gaps, sequences = get_temporal_gaps_and_sequences(rpd_files, time_span)
523
524            rpd_files2.extend(gaps)
525
526            indexes = split_indexes(len(sequences))
527            rpd_files2.extend([sequences[idx] for idx in indexes])
528
529        assert len(rpd_files) == len(rpd_files2)
530        rpd_files = rpd_files2
531
532        if arguments.camera is not None:
533            self.camera = Camera(
534                model=arguments.camera, port=arguments.port,
535                specific_folders=self.prefs.folders_to_scan
536            )
537
538            if not self.camera.camera_initialized:
539                # There is nothing to do here: exit!
540                logging.debug(
541                    "Prematurely exiting thumbnail generation due to lack of access to camera %s",
542                    arguments.camera
543                )
544                self.content = pickle.dumps(
545                    GenerateThumbnailsResults(
546                        scan_id=arguments.scan_id,
547                        camera_removed=True,
548                    ), pickle.HIGHEST_PROTOCOL
549                )
550                self.send_message_to_sink()
551                self.disconnect_logging()
552                self.send_finished_command()
553                sys.exit(0)
554
555            if not cache_file_from_camera:
556                for rpd_file in rpd_files:
557                    if use_exiftool_on_photo(rpd_file.extension,
558                                             preview_extraction_irrelevant=False):
559                        cache_file_from_camera = True
560                        break
561
562            must_make_cache_dirs = (not self.camera.can_fetch_thumbnails or cache_file_from_camera)
563
564            if must_make_cache_dirs or arguments.need_video_cache_dir \
565                    or arguments.need_photo_cache_dir:
566                # If downloading complete copy of the files to
567                # generate previews, then may as well cache them to speed up
568                # the download process
569                self.photo_cache_dir = create_temp_dir(
570                    folder=arguments.cache_dirs.photo_cache_dir,
571                    prefix=cache_dir_name(self.device_name)
572                )
573                self.video_cache_dir = create_temp_dir(
574                    folder=arguments.cache_dirs.video_cache_dir,
575                    prefix=cache_dir_name(self.device_name)
576                )
577                cache_dirs = CacheDirs(self.photo_cache_dir, self.video_cache_dir)
578                self.content = pickle.dumps(
579                    GenerateThumbnailsResults(
580                        scan_id=arguments.scan_id,
581                        cache_dirs=cache_dirs
582                    ), pickle.HIGHEST_PROTOCOL
583                )
584                self.send_message_to_sink()
585
586        from_thumb_cache = 0
587        from_fdo_cache = 0
588
589        if self.camera:
590            rescan = RescanCamera(camera=self.camera, prefs=self.prefs)
591            rescan.rescan_camera(rpd_files)
592            rpd_files = rescan.rpd_files
593            if rescan.missing_rpd_files:
594                logging.error(
595                    "%s files could not be relocated on %s",
596                    len(rescan.missing_rpd_files), self.camera.display_name
597                )
598                for rpd_file in rescan.missing_rpd_files:  # type: RPDFile
599                    self.content = pickle.dumps(
600                        GenerateThumbnailsResults(rpd_file=rpd_file, thumbnail_bytes=None),
601                        pickle.HIGHEST_PROTOCOL
602                    )
603                    self.send_message_to_sink()
604
605        for rpd_file in rpd_files:  # type: RPDFile
606            # Check to see if the process has received a command
607            self.check_for_controller_directive()
608
609            exif_buffer = None
610            file_to_work_on_is_temporary = False
611            secondary_full_file_name = ''
612            processing = set()  # type: Set[ExtractionProcessing]
613
614            # Attempt to get thumbnail from Thumbnail Cache
615            # (see cache.py for definitions of various caches)
616
617            cache_search = thumbnail_caches.get_from_cache(rpd_file)
618            task, thumbnail_bytes, full_file_name_to_work_on, origin = cache_search
619            if task != ExtractionTask.undetermined:
620                if origin == ThumbnailCacheOrigin.thumbnail_cache:
621                    from_thumb_cache += 1
622                else:
623                    assert origin == ThumbnailCacheOrigin.fdo_cache
624                    logging.debug(
625                        "Thumbnail for %s found in large FDO cache", rpd_file.full_file_name
626                    )
627                    from_fdo_cache += 1
628                    processing.add(ExtractionProcessing.resize)
629                    if not rpd_file.mdatatime:
630                        # Since we're extracting the thumbnail from the FDO cache,
631                        # need to grab its metadata too.
632                        # Reassign the task
633                        task = ExtractionTask.load_file_directly_metadata_from_secondary
634                        # It's not being downloaded from a camera, so nothing
635                        # special to do except assign the name of the file from which
636                        # to extract the metadata
637                        secondary_full_file_name = rpd_file.full_file_name
638                        logging.debug(
639                            "Although thumbnail found in the cache, tasked to extract "
640                            "file time recorded in metadata from %s", secondary_full_file_name
641                        )
642            if task == ExtractionTask.undetermined:
643                # Thumbnail was not found in any cache: extract it
644                if self.camera:  # type: Camera
645                    if rpd_file.file_type == FileType.photo:
646                        if rpd_file.is_heif():
647                            # Load HEIF / HEIC using entire file.
648                            # We are assuming that there is no tool to extract a
649                            # preview image from an HEIF / HEIC, or the file simply
650                            # does not have one to extract.
651                            if self.cache_full_size_file_from_camera(rpd_file):
652                                task = ExtractionTask.load_heif_and_exif_directly
653                                processing.add(ExtractionProcessing.resize)
654                                full_file_name_to_work_on = rpd_file.cache_full_file_name
655                                # For now, do not orient, as it seems pyheif or libheif does
656                                # that automatically.
657                                # processing.add(ExtractionProcessing.orient)
658
659                        elif self.camera.can_fetch_thumbnails:
660                            task = ExtractionTask.load_from_bytes
661                            if rpd_file.is_jpeg_type():
662                                # gPhoto2 knows how to get jpeg thumbnails
663                                try:
664                                    thumbnail_bytes = self.camera.get_thumbnail(
665                                        rpd_file.path, rpd_file.name
666                                    )
667                                except CameraProblemEx as e:
668                                    # TODO handle error?
669                                    thumbnail_bytes = None
670                            else:
671
672                                if force_exiftool or use_exiftool_on_photo(rpd_file.extension,
673                                                           preview_extraction_irrelevant=False):
674                                    task, full_file_name_to_work_on, \
675                                        file_to_work_on_is_temporary =\
676                                        self.extract_photo_video_from_camera(
677                                            rpd_file, entire_photo_required,
678                                            full_file_name_to_work_on,
679                                            True
680                                        )
681                                    if task == ExtractionTask.load_from_bytes_metadata_from_temp_extract:
682                                        secondary_full_file_name = full_file_name_to_work_on
683                                        file_to_work_on_is_temporary = False
684
685                                else:
686                                    # gPhoto2 does not know how to get RAW thumbnails, so we do that
687                                    # part ourselves
688                                    if rpd_file.extension == 'crw':
689                                        # Could cache this file, since reading its entirety
690                                        # But does anyone download a CRW file from the camera these
691                                        # days?!
692                                        bytes_to_read = rpd_file.size
693                                    else:
694                                        bytes_to_read = min(
695                                            rpd_file.size, orientation_offset.get(
696                                                rpd_file.extension, 500
697                                            )
698                                        )
699                                    exif_buffer = self.camera.get_exif_extract(
700                                        rpd_file.path, rpd_file.name, bytes_to_read
701                                    )
702                                try:
703                                    thumbnail_bytes = self.camera.get_thumbnail(
704                                        rpd_file.path, rpd_file.name
705                                    )
706                                except CameraProblemEx as e:
707                                    # TODO report error
708                                    thumbnail_bytes = None
709                            processing.add(ExtractionProcessing.strip_bars_photo)
710                            processing.add(ExtractionProcessing.orient)
711                        else:
712                            # Many (all?) jpegs from phones don't include jpeg previews,
713                            # so need to render from the entire jpeg itself. Slow!
714
715                            # If rawkit is not installed, then extract merely a part of
716                            # phone's raw format, and try to extract the jpeg preview
717                            # from it (which probably doesn't exist!). This is fast.
718                            # If have rawkit, download and render an image from the
719                            # RAW
720
721                            if not rpd_file.is_jpeg() and not have_rawkit:
722                                bytes_to_read = thumbnail_offset.get(rpd_file.extension)
723                                if bytes_to_read:
724                                    exif_buffer = self.camera.get_exif_extract(
725                                        rpd_file.path, rpd_file.name, bytes_to_read
726                                    )
727                                    task = ExtractionTask.load_from_exif_buffer
728                                    processing.add(ExtractionProcessing.orient)
729                            if (task == ExtractionTask.undetermined and
730                                    self.cache_full_size_file_from_camera(rpd_file)):
731                                if rpd_file.is_jpeg():
732                                    task = ExtractionTask.load_file_and_exif_directly
733                                    processing.add(ExtractionProcessing.resize)
734                                    processing.add(ExtractionProcessing.orient)
735                                else:
736                                    task = ExtractionTask.load_from_exif
737                                    processing.add(ExtractionProcessing.resize)
738                                    processing.add(ExtractionProcessing.orient)
739                                full_file_name_to_work_on = rpd_file.cache_full_file_name
740                            else:
741                                # Failed to generate thumbnail
742                                task = ExtractionTask.bypass
743                    else:
744                        # video from camera
745                        if rpd_file.thm_full_name is not None:
746                            # Fortunately, we have a special video thumbnail file
747                            # Still need to get metadata time, however.
748
749                            if entire_video_required:
750                                offset = rpd_file.size
751                            else:
752                                offset = datetime_offset.get(rpd_file.extension)
753                                # If there is no offset, there is no point trying to extract the
754                                # metadata time from part of the video. It's not ideal,
755                                # but if this is from a camera on which there were any other files
756                                # we can assume we've got a somewhat accurate date time for it from
757                                # the modification time.
758                                # The only exception is if the video file is not that big, in which
759                                # case it's worth reading in its entirety:
760                                if offset is None and rpd_file.size < 4000000:
761                                    offset = rpd_file.size
762
763                            if rpd_file.mdatatime or not offset:
764                                task = ExtractionTask.load_from_bytes
765                            elif self.cache_file_chunk_from_camera(rpd_file, offset):
766                                task = ExtractionTask.load_from_bytes_metadata_from_temp_extract
767                                secondary_full_file_name = rpd_file.temp_cache_full_file_chunk
768                            else:
769                                # For some reason was unable to download part of the video file
770                                task = ExtractionTask.load_from_bytes
771
772                            try:
773                                thumbnail_bytes = self.camera.get_THM_file(rpd_file.thm_full_name)
774                            except CameraProblemEx as e:
775                                # TODO report error
776                                thumbnail_bytes = None
777                            processing.add(ExtractionProcessing.strip_bars_video)
778                            processing.add(ExtractionProcessing.add_film_strip)
779                        else:
780                            task, full_file_name_to_work_on, file_to_work_on_is_temporary = \
781                                self.extract_photo_video_from_camera(
782                                    rpd_file, entire_video_required,
783                                    full_file_name_to_work_on,
784                                    False
785                                )
786                else:
787                    # File is not on a camera
788                    task = preprocess_thumbnail_from_disk(rpd_file=rpd_file, processing=processing)
789                    if task != ExtractionTask.bypass:
790                        if rpd_file.thm_full_name is not None:
791                            full_file_name_to_work_on = rpd_file.thm_full_name
792                            if task == ExtractionTask.load_file_directly_metadata_from_secondary:
793                                secondary_full_file_name = rpd_file.full_file_name
794                        else:
795                            full_file_name_to_work_on = rpd_file.full_file_name
796
797            if task == ExtractionTask.bypass:
798                self.content = pickle.dumps(
799                    GenerateThumbnailsResults(rpd_file=rpd_file, thumbnail_bytes=thumbnail_bytes),
800                    pickle.HIGHEST_PROTOCOL
801                )
802                self.send_message_to_sink()
803
804            elif task != ExtractionTask.undetermined:
805                # Send data to load balancer, which will send to one of its
806                # workers
807
808                self.content = pickle.dumps(
809                    ThumbnailExtractorArgument(
810                        rpd_file=rpd_file,
811                        task=task,
812                        processing=processing,
813                        full_file_name_to_work_on=full_file_name_to_work_on,
814                        secondary_full_file_name=secondary_full_file_name,
815                        exif_buffer=exif_buffer,
816                        thumbnail_bytes=thumbnail_bytes,
817                        use_thumbnail_cache=use_thumbnail_cache,
818                        file_to_work_on_is_temporary=file_to_work_on_is_temporary,
819                        write_fdo_thumbnail=False,
820                        send_thumb_to_main=True,
821                        force_exiftool=force_exiftool
822                    ),
823                    pickle.HIGHEST_PROTOCOL)
824                self.frontend.send_multipart([b'data', self.content])
825
826        if arguments.camera:
827            self.camera.free_camera()
828            # Delete our temporary cache directories if they are empty
829            if photo_cache_dir is not None:
830                if not os.listdir(self.photo_cache_dir):
831                    os.rmdir(self.photo_cache_dir)
832            if video_cache_dir is not None:
833                if not os.listdir(self.video_cache_dir):
834                    os.rmdir(self.video_cache_dir)
835
836        logging.debug("Finished phase 1 of thumbnail generation for %s", self.device_name)
837        if from_thumb_cache:
838            logging.info(
839                "{} of {} thumbnails for {} came from thumbnail cache".format(
840                    from_thumb_cache, len(rpd_files), self.device_name
841                )
842            )
843        if from_fdo_cache:
844            logging.info(
845                "{} of {} thumbnails of for {} came from Free Desktop cache".format(
846                    from_fdo_cache, len(rpd_files), self.device_name
847                )
848            )
849
850        self.disconnect_logging()
851        self.send_finished_command()
852
853    def cleanup_pre_stop(self):
854        if self.camera is not None:
855            self.camera.free_camera()
856
857
858if __name__ == "__main__":
859    generate_thumbnails = GenerateThumbnails()
860