1#!/usr/bin/env python3
2
3# Copyright (C) 2007-2020 Damon Lynch <damonlynch@gmail.com>
4
5# This file is part of Rapid Photo Downloader.
6#
7# Rapid Photo Downloader is free software: you can redistribute it and/or
8# modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# Rapid Photo Downloader is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with Rapid Photo Downloader.  If not,
20# see <http://www.gnu.org/licenses/>.
21
22"""
23Read photo and video metadata using ExifTool daemon process.
24"""
25
26__author__ = 'Damon Lynch'
27__copyright__ = "Copyright 2007-2020, Damon Lynch"
28
29import datetime
30import re
31import logging
32from typing import Optional, Union, Any, Tuple, List
33from collections import OrderedDict
34
35import raphodo.exiftool as exiftool
36from raphodo.utilities import flexible_date_time_parser
37from raphodo.constants import FileType
38import raphodo.programversions as programversions
39import raphodo.fileformats as fileformats
40
41
42# Turned into an OrderedDict below
43_index_preview = {
44            0: 'PreviewImage',
45            1: 'OtherImage',
46            2: 'JpgFromRaw',
47            3: 'PreviewTIFF',
48            4: 'ThumbnailTIFF'
49}
50
51
52class MetadataExiftool():
53    """
54    Read photo and video metadata using exiftool daemon process.
55    """
56
57    def __init__(self, full_file_name: str,
58                 et_process: exiftool.ExifTool,
59                 file_type: Optional[FileType]=None) -> None:
60        """
61        Get photo and video metadata using Exiftool
62
63        :param filename: the file from which to get metadata
64        :param et_process: instance of ExifTool class, which allows
65        calling EXifTool without it exiting with each call
66        :param file_type: photo or video. If not specified, will be determined
67         using file extension
68        """
69
70        super().__init__()
71
72        self.full_file_name = full_file_name
73        if full_file_name is not None:
74            self.ext = fileformats.extract_extension(full_file_name)
75        else:
76            self.ext = None
77        self.metadata = dict()
78        self.metadata_string_format = dict()
79        self.et_process = et_process
80        if file_type is None and full_file_name is not None:
81            file_type = fileformats.file_type_from_splitext(file_name=full_file_name)
82        assert file_type is not None
83        self.file_type = file_type
84
85        # All the names of the preview images we know about (there may be more, perhaps)
86        # Synchronize with preview_smallest and preview256 dicts below
87        self.index_preview = OrderedDict(sorted(_index_preview.items(), key=lambda t: t[0]))
88
89        # If extension is not in dict preview_smallest, that means the file
90        # format always contains a "ThumbnailImage"
91        self.preview_smallest = dict(
92            crw=(2, ),
93            dng=(4, 3, 0),
94            fff=(3, ),
95            iiq=(4, ),
96            mrw=(0, ),
97            nef=(4, 3),
98            raw=(2, ),
99        )
100        self.preview_smallest['3fr'] = 3, 4
101
102        # Format might have a thumbnail, but might not
103        self.may_have_thumbnail = ('crw', 'mrw', 'orf', 'raw', 'x3f')
104
105        # Preview images that are at least 256 pixels big, according to self.index_preview
106        self.preview256 = dict(
107            arw=(0, ),
108            cr2=(0, ),
109            cr3=(0, ),
110            crw=(2, ),
111            dng=(0, 3),
112            fff=(3, ),
113            iiq=(4, ),
114            mrw=(0, ),
115            nef=(0, 4, 2, 3),  # along with DNG quite possibly the most inconsistent format
116            nrw=(0, 1),
117            orf=(0, ),
118            pef=(0, ),
119            raf=(0, ),
120            raw=(2, ),
121            rw2=(2, ),
122            sr2=(0, ),
123            srw=(0, ),
124            x3f=(0, 2),
125        )
126        self.preview256['3fr'] = 3, 4
127
128        self.ignore_tiff_preview_256 = ('cr2', )
129
130    def _get(self, key, missing):
131        if key in ("VideoStreamType", "FileNumber", "ExposureTime"):
132            # special cases: want ExifTool's string formatting
133            # i.e. no -n tag
134            if not self.metadata_string_format:
135                try:
136                    self.metadata_string_format = \
137                        self.et_process.execute_json_no_formatting(self.full_file_name)
138                except ValueError:
139                    return missing
140            try:
141                return self.metadata_string_format[0][key]
142            except:
143                return missing
144
145        elif not self.metadata:
146            try:
147                self.metadata = self.et_process.get_metadata(self.full_file_name)
148            except ValueError:
149                return missing
150
151        return self.metadata.get(key, missing)
152
153    def date_time(self, missing: Optional[str]='',
154                            ignore_file_modify_date: bool = False) -> Union[datetime.datetime, Any]:
155        """
156        Tries to get value from key "DateTimeOriginal"
157        If that fails, tries "CreateDate", and then finally
158        FileModifyDate
159
160        :param ignore_file_modify_date: if True, don't return the file
161        modification date
162        :return  python datetime format the date and time the video or photo was
163        recorded, else missing
164        """
165
166        d = self._get('DateTimeOriginal', None)
167        if d is None:
168            d = self._get('CreateDate', None)
169        if d is None and not ignore_file_modify_date:
170            d = self._get('FileModifyDate', None)
171        if d is not None:
172            d = d.strip()
173            try:
174                dt, fs = flexible_date_time_parser(d)
175                logging.debug(
176                    "Extracted %s time %s using ExifTool", self.file_type.name, dt.strftime(fs)
177                )
178
179            except AssertionError:
180                logging.warning(
181                    "Error extracting date time metadata '%s' for %s %s",
182                    d, self.file_type.name, self.full_file_name
183                )
184                return missing
185
186            except (ValueError, OverflowError):
187                logging.warning(
188                    "Error parsing date time metadata '%s' for %s %s",
189                    d, self.file_type.name, self.full_file_name
190                )
191                return missing
192            except Exception:
193                logging.error(
194                    "Unknown error parsing date time metadata '%s' for %s %s",
195                    d, self.file_type.name, self.full_file_name
196                )
197                return missing
198
199            return dt
200        else:
201            return missing
202
203    def timestamp(self, missing='') -> Union[float, Any]:
204        """
205        Photo and Video
206        :return: a float value representing the time stamp, if it exists
207        """
208
209        dt = self.date_time(missing=None)
210        if dt is not None:
211            try:
212                ts = dt.timestamp()
213                ts = float(ts)
214            except:
215                ts = missing
216        else:
217            ts = missing
218        return ts
219
220    def file_number(self, missing='') -> Union[str, Any]:
221        """
222        Photo and video
223        :return: a string value representing the File number, if it exists
224        """
225
226        v = self._get("FileNumber", None)
227        if v is not None:
228            return str(v)
229        else:
230            return missing
231
232    def width(self, missing='') -> Union[str, Any]:
233        v = self._get('ImageWidth', None)
234        if v is not None:
235            return str(v)
236        else:
237            return missing
238
239    def height(self, missing='') -> Union[str, Any]:
240        v = self._get('ImageHeight', None)
241        if v is not None:
242            return str(v)
243        else:
244            return missing
245
246    def length(self, missing='') -> Union[str, Any]:
247        """
248        return the duration (length) of the video, rounded to the nearest second, in string format
249        """
250        v = self._get("Duration", None)
251        if v is not None:
252            try:
253                v = float(v)
254                v = "%0.f" % v
255            except:
256                return missing
257            return v
258        else:
259            return missing
260
261    def frames_per_second(self, missing='') -> Union[str, Any]:
262        v = self._get("FrameRate", None)
263        if v is None:
264            v = self._get("VideoFrameRate", None)
265
266        if v is None:
267            return missing
268        try:
269            v = '%.0f' % v
270        except:
271            return missing
272        return v
273
274    def codec(self, missing='') -> Union[str, Any]:
275        v = self._get("VideoStreamType", None)
276        if v is None:
277            v = self._get("VideoCodec", None)
278        if v is not None:
279            return v
280        return missing
281
282    def fourcc(self, missing='') -> Union[str, Any]:
283        return self._get("CompressorID", missing)
284
285    def rotation(self, missing=0) -> Union[int, Any]:
286        v = self._get("Rotation", None)
287        if v is not None:
288            return v
289        return missing
290
291    def aperture(self, missing='') -> Union[str, Any]:
292        """
293        Returns in string format the floating point value of the image's
294        aperture.
295
296        Returns missing if the metadata value is not present.
297        """
298        v = self._get('FNumber', None)
299        try:
300            v = float(v)
301        except (ValueError, TypeError):  # TypeError catches None
302            return missing
303
304        if v is not None:
305            return "{:.1f}".format(v)
306        return missing
307
308    def iso(self, missing='') -> Union[str, Any]:
309        """
310        Returns in string format the integer value of the image's ISO.
311
312        Returns missing if the metadata value is not present.
313        """
314        v = self._get('ISO', None)
315        if v:
316            return str(v)
317        return missing
318
319
320    def _exposure_time_rational(self, missing=None) -> Tuple[Any, Any]:
321        """
322        Split exposure time value into fraction for further processing
323        :param missing:
324        :return: tuple of exposure time e.g. '1', '320' (for 1/320 sec)
325          or '2.5', 1 (for 2.5 secs)
326        """
327
328        v = self._get('ExposureTime', None)
329        if v is None:
330            return missing, missing
331        v = str(v)
332
333        # ExifTool returns two distinct types values e.g.:
334        # '1/125' fraction (string)
335        # '2.5' floating point
336
337        # fractional format
338        if v.find('/') > 0:
339            return tuple(v.split('/')[:2])
340
341        # already in floating point format
342        return v, 1
343
344    def exposure_time(self, alternativeFormat=False, missing='') -> Union[str, Any]:
345        """
346        Returns in string format the exposure time of the image.
347
348        Returns missing if the metadata value is not present.
349
350        alternativeFormat is useful if the value is going to be  used in a
351        purpose where / is an invalid character, e.g. file system names.
352
353        alternativeFormat is False:
354        For exposures less than one second, the result is formatted as a
355        fraction e.g. 1/125
356        For exposures greater than or equal to one second, the value is
357        formatted as an integer e.g. 30
358
359        alternativeFormat is True:
360        For exposures less than one second, the result is formatted as an
361        integer e.g. 125
362        For exposures less than one second but more than or equal to
363        one tenth of a second, the result is formatted as an integer
364        e.g. 3 representing 3/10 of a second
365        For exposures greater than or equal to one second, the value is
366        formatted as an integer with a trailing s e.g. 30s
367        """
368
369        e0, e1 = self._exposure_time_rational()
370
371        if e0 is not None and e1 is not None:
372
373            if str(e0).find('.') > 0:
374                try:
375                    assert e1 == 1
376                except AssertionError as e:
377                    logging.exception('{}: {}, {}'.format(self.full_file_name, e0, e1))
378                    raise AssertionError from e
379                e0 = float(e0)
380            else:
381                try:
382                    e0 = int(e0)
383                    e1 = int(e1)
384                except ValueError as e:
385                    logging.exception('{}: {}, {}'.format(self.full_file_name, e0, e1))
386                    raise ValueError from e
387
388            if e1 > e0:
389                if alternativeFormat:
390                    if e0 == 1:
391                        return str(e1)
392                    else:
393                        return str(e0)
394                else:
395                    return "%s/%s" % (e0, e1)
396            elif e0 > e1:
397                e = float(e0) / e1
398                if alternativeFormat:
399                    return "%.0fs" % e
400                else:
401                    return "%.0f" % e
402            else:
403                return "1s"
404        else:
405            return missing
406
407    def focal_length(self, missing='') -> Union[str, Any]:
408        v = self._get('FocalLength', None)
409        if v is not None:
410            return str(v)
411        return missing
412
413    def camera_make(self, missing='') -> Union[str, Any]:
414        v = self._get('Make', None)
415        if v is not None:
416            return str(v)
417        return missing
418
419    def camera_model(self, missing='') -> Union[str, Any]:
420        v = self._get('Model', None)
421        if v is not None:
422            return str(v)
423        return missing
424
425    def short_camera_model(self, includeCharacters='', missing=''):
426        """
427        Returns in shorterned string format the camera model used to record
428        the image.
429
430        Returns missing if the metadata value is not present.
431
432        The short format is determined by the first occurrence of a digit in
433        the
434        camera model, including all alphaNumeric characters before and after
435        that digit up till a non-alphanumeric character, but with these
436        interventions:
437
438        1. Canon "Mark" designations are shortened prior to conversion.
439        2. Names like "Canon EOS DIGITAL REBEL XSi" do not have a number and
440        must
441            and treated differently (see below)
442
443        Examples:
444        Canon EOS 300D DIGITAL -> 300D
445        Canon EOS 5D -> 5D
446        Canon EOS 5D Mark II -> 5DMkII
447        NIKON D2X -> D2X
448        NIKON D70 -> D70
449        X100,D540Z,C310Z -> X100
450        Canon EOS DIGITAL REBEL XSi -> XSi
451        Canon EOS Digital Rebel XS -> XS
452        Canon EOS Digital Rebel XTi -> XTi
453        Canon EOS Kiss Digital X -> Digital
454        Canon EOS Digital Rebel XT -> XT
455        EOS Kiss Digital -> Digital
456        Canon Digital IXUS Wireless -> Wireless
457        Canon Digital IXUS i zoom -> zoom
458        Canon EOS Kiss Digital N -> N
459        Canon Digital IXUS IIs -> IIs
460        IXY Digital L -> L
461        Digital IXUS i -> i
462        IXY Digital -> Digital
463        Digital IXUS -> IXUS
464
465        The optional includeCharacters allows additional characters to appear
466        before and after the digits.
467        Note: special includeCharacters MUST be escaped as per syntax of a
468        regular expressions (see documentation for module re)
469
470        Examples:
471
472        includeCharacters = '':
473        DSC-P92 -> P92
474        includeCharacters = '\-':
475        DSC-P92 -> DSC-P92
476
477        If a digit is not found in the camera model, the last word is returned.
478
479        Note: assume exif values are in ENGLISH, regardless of current platform
480        """
481        m = self.camera_model()
482        m = m.replace(' Mark ', 'Mk')
483        if m:
484            s = r"(?:[^a-zA-Z0-9%s]?)(?P<model>[a-zA-Z0-9%s]*\d+[" \
485                r"a-zA-Z0-9%s]*)" \
486                % (includeCharacters, includeCharacters, includeCharacters)
487            r = re.search(s, m)
488            if r:
489                return r.group("model")
490            else:
491                head, space, model = m.strip().rpartition(' ')
492                return model
493        else:
494            return missing
495
496    def camera_serial(self, missing='') -> Union[str, Any]:
497        v = self._get('SerialNumber', None)
498        if v is not None:
499            return str(v)
500        return missing
501
502    def shutter_count(self, missing='') -> Union[str, Any]:
503        v = self._get('ShutterCount', None)
504        if v is None:
505            v = self._get('ImageNumber', None)
506
507        if v is not None:
508            return str(v)
509        return missing
510
511    def owner_name(self, missing='') -> Union[str, Any]:
512
513        # distinct from CopyrightOwnerName
514        v = self._get('OwnerName', None)
515        if v is not None:
516            return str(v)
517        return missing
518
519    def copyright(self, missing='') -> Union[str, Any]:
520        v = self._get('Copyright', None)
521        if v is not None:
522            return str(v)
523        return missing
524
525    def artist(self, missing=''):
526        v = self._get('Artist', None)
527        if v is not None:
528            return str(v)
529        return missing
530
531    def sub_seconds(self, missing='00') -> Union[str, Any]:
532        v = self._get('SubSecTime', None)
533        if v is not None:
534            return str(v)
535        return missing
536
537    def orientation(self, missing='') -> Union[str, Any]:
538        v = self._get('Orientation', None)
539        if v is not None:
540            return str(v)
541        return missing
542
543    def _get_binary(self, key: str) -> Optional[bytes]:
544        return self.et_process.execute_binary("-{}".format(key), self.full_file_name)
545
546    def get_small_thumbnail(self) -> Optional[bytes]:
547        """
548        Get the small thumbnail image (if it exists)
549        :return: thumbnail image in raw bytes
550        """
551
552        return self._get_binary("ThumbnailImage")
553
554    def get_indexed_preview(self, preview_number: int=0, force: bool=False) -> Optional[bytes]:
555        """
556        Extract preview image from the metadata
557        If initial preview number does not work, tries others
558
559        :param preview_number: which preview to get
560        :param force: if True, get only that preview. Otherwise, take a flexible approach
561         where every preview is tried image, in order found in index_preview
562        :return: preview image in raw bytes, if found, else None
563        """
564
565        key = self.index_preview[preview_number]
566        b = self._get_binary(key)
567        if b:
568            return b
569        if force:
570            return None
571
572        logging.debug(
573            "Attempt to extract %s using ExifTool from %s failed. Trying flexible approach.",
574            key, self.full_file_name
575        )
576
577        assert not force
578        untried_indexes = (
579            index for index in self.index_preview.keys() if index != preview_number
580        )
581
582        valid_untried_indexes = [
583            index for index in untried_indexes if self.index_preview[index] in self.metadata
584        ]
585        if valid_untried_indexes:
586            for index in valid_untried_indexes:
587                key = self.index_preview[index]
588                logging.debug("Attempting %s on %s...", key, self.full_file_name)
589                b = self._get_binary(key)
590                if b:
591                    logging.debug("...attempt successful from %s", self.full_file_name)
592                    return b
593                logging.debug("...attempt failed on %s", self.full_file_name)
594        else:
595            logging.debug(
596                "No other preview image indexes remain to be tried on %s", self.full_file_name
597            )
598
599        logging.warning("ExifTool could not extract a preview image from %s", self.full_file_name)
600        return None
601
602    def get_small_thumbnail_or_first_indexed_preview(self) -> Optional[bytes]:
603        """
604        First attempt to get the small thumbnail image. If it does not exist,
605        extract the smallest preview image from the metadata
606
607        :return: thumbnail / preview image in raw bytes, if found, else None
608        """
609
610        # Look for "ThumbnailImage" if the file format supports it
611        if self.ext not in self.preview_smallest or self.ext in self.may_have_thumbnail:
612            thumbnail = self.get_small_thumbnail()
613            if thumbnail is not None:
614                return thumbnail
615
616        # Otherwise look for the smallest preview image for this format
617        if self.ext in self.preview_smallest:
618            for index in self.preview_smallest[self.ext]:
619                thumbnail = self.get_indexed_preview(preview_number=index, force=True)
620                if thumbnail:
621                    return thumbnail
622
623        # If that fails, take a flexible approach
624        return self.get_indexed_preview(force=False)
625
626    def get_preview_256(self) -> Optional[bytes]:
627        """
628        :return: if possible, return a preview image that is preferably larger than 256 pixels,
629         else the smallest preview if it exists
630        """
631
632        # look for the smallest preview
633        if self.ext in self.preview256:
634            for index in self.preview256[self.ext]:
635                thumbnail = self.get_indexed_preview(preview_number=index, force=True)
636                if thumbnail is not None:
637                    return thumbnail
638
639        # If that fails, take a flexible approach
640        return self.get_indexed_preview(force=False)
641
642    def preview_names(self) -> Optional[List[str]]:
643        """
644        Names of preview image located in the file, excluding the tag ThumbnailImage
645
646        :return None if unsuccessful, else names of preview images
647        """
648
649        if not self.metadata:
650            try:
651                self.metadata = self.et_process.get_metadata(self.full_file_name)
652            except ValueError:
653                return None
654
655        return [v for v in self.index_preview.values() if v in self.metadata]
656
657
658if __name__ == '__main__':
659    import sys
660
661    with exiftool.ExifTool() as et_process:
662        if (len(sys.argv) != 2):
663            print('Usage: ' + sys.argv[0] + ' path/to/video_or_photo/containing/metadata')
664        else:
665            file = sys.argv[1]
666
667            print("ExifTool", programversions.exiftool_version_info())
668            file_type = fileformats.file_type_from_splitext(file_name=file)
669            if file_type is None:
670                print("Unsupported file type")
671                sys.exit(1)
672            m = MetadataExiftool(file, et_process, file_type)
673            print(m.date_time())
674            print("f" + m.aperture('missing '))
675            print("ISO " + m.iso('missing '))
676            print(m.exposure_time(missing='missing ') + " sec")
677            print(m.exposure_time(alternativeFormat=True, missing='missing '))
678            print(m.focal_length('missing ') + "mm")
679            print(m.camera_make())
680            print(m.camera_model())
681            print('Serial number:', m.camera_serial(missing='missing'))
682            print('Shutter count:', m.shutter_count())
683            print('Owner name:', m.owner_name())
684            print('Copyright:', m.copyright())
685            print('Artist', m.artist())
686            print('Subseconds:', m.sub_seconds())
687            print('Orientation:', m.orientation())
688            print('Preview names (excluding Thumbnail): ', m.preview_names())
689            preview = m.get_small_thumbnail_or_first_indexed_preview()
690
691            thumb = m.get_small_thumbnail()
692            if thumb:
693                print('Thumbnail size: {} bytes'.format(len(thumb)))
694            else:
695                print('No thumbnail detected')
696
697            previews = et_process.execute(file.encode(), b'-preview:all')
698            print("ExifTool raw output:")
699            if previews:
700                print(previews.decode())
701            else:
702                print('No previews detected')
703
704
705
706
707            # print("%sx%s" % (m.width(), m.height()))
708            # print("Length:", m.length())
709            # print("FPS: ", m.frames_per_second())
710            # print("Codec:", m.codec())