1#!/usr/bin/env python3 2 3# Copyright (C) 2007-2020 Damon Lynch <damonlynch@gmail.com> 4 5# This file is part of Rapid Photo Downloader. 6# 7# Rapid Photo Downloader is free software: you can redistribute it and/or 8# modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation, either version 3 of the License, or 11# (at your option) any later version. 12# 13# Rapid Photo Downloader is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with Rapid Photo Downloader. If not, 20# see <http://www.gnu.org/licenses/>. 21 22""" 23Read photo and video metadata using ExifTool daemon process. 24""" 25 26__author__ = 'Damon Lynch' 27__copyright__ = "Copyright 2007-2020, Damon Lynch" 28 29import datetime 30import re 31import logging 32from typing import Optional, Union, Any, Tuple, List 33from collections import OrderedDict 34 35import raphodo.exiftool as exiftool 36from raphodo.utilities import flexible_date_time_parser 37from raphodo.constants import FileType 38import raphodo.programversions as programversions 39import raphodo.fileformats as fileformats 40 41 42# Turned into an OrderedDict below 43_index_preview = { 44 0: 'PreviewImage', 45 1: 'OtherImage', 46 2: 'JpgFromRaw', 47 3: 'PreviewTIFF', 48 4: 'ThumbnailTIFF' 49} 50 51 52class MetadataExiftool(): 53 """ 54 Read photo and video metadata using exiftool daemon process. 55 """ 56 57 def __init__(self, full_file_name: str, 58 et_process: exiftool.ExifTool, 59 file_type: Optional[FileType]=None) -> None: 60 """ 61 Get photo and video metadata using Exiftool 62 63 :param filename: the file from which to get metadata 64 :param et_process: instance of ExifTool class, which allows 65 calling EXifTool without it exiting with each call 66 :param file_type: photo or video. If not specified, will be determined 67 using file extension 68 """ 69 70 super().__init__() 71 72 self.full_file_name = full_file_name 73 if full_file_name is not None: 74 self.ext = fileformats.extract_extension(full_file_name) 75 else: 76 self.ext = None 77 self.metadata = dict() 78 self.metadata_string_format = dict() 79 self.et_process = et_process 80 if file_type is None and full_file_name is not None: 81 file_type = fileformats.file_type_from_splitext(file_name=full_file_name) 82 assert file_type is not None 83 self.file_type = file_type 84 85 # All the names of the preview images we know about (there may be more, perhaps) 86 # Synchronize with preview_smallest and preview256 dicts below 87 self.index_preview = OrderedDict(sorted(_index_preview.items(), key=lambda t: t[0])) 88 89 # If extension is not in dict preview_smallest, that means the file 90 # format always contains a "ThumbnailImage" 91 self.preview_smallest = dict( 92 crw=(2, ), 93 dng=(4, 3, 0), 94 fff=(3, ), 95 iiq=(4, ), 96 mrw=(0, ), 97 nef=(4, 3), 98 raw=(2, ), 99 ) 100 self.preview_smallest['3fr'] = 3, 4 101 102 # Format might have a thumbnail, but might not 103 self.may_have_thumbnail = ('crw', 'mrw', 'orf', 'raw', 'x3f') 104 105 # Preview images that are at least 256 pixels big, according to self.index_preview 106 self.preview256 = dict( 107 arw=(0, ), 108 cr2=(0, ), 109 cr3=(0, ), 110 crw=(2, ), 111 dng=(0, 3), 112 fff=(3, ), 113 iiq=(4, ), 114 mrw=(0, ), 115 nef=(0, 4, 2, 3), # along with DNG quite possibly the most inconsistent format 116 nrw=(0, 1), 117 orf=(0, ), 118 pef=(0, ), 119 raf=(0, ), 120 raw=(2, ), 121 rw2=(2, ), 122 sr2=(0, ), 123 srw=(0, ), 124 x3f=(0, 2), 125 ) 126 self.preview256['3fr'] = 3, 4 127 128 self.ignore_tiff_preview_256 = ('cr2', ) 129 130 def _get(self, key, missing): 131 if key in ("VideoStreamType", "FileNumber", "ExposureTime"): 132 # special cases: want ExifTool's string formatting 133 # i.e. no -n tag 134 if not self.metadata_string_format: 135 try: 136 self.metadata_string_format = \ 137 self.et_process.execute_json_no_formatting(self.full_file_name) 138 except ValueError: 139 return missing 140 try: 141 return self.metadata_string_format[0][key] 142 except: 143 return missing 144 145 elif not self.metadata: 146 try: 147 self.metadata = self.et_process.get_metadata(self.full_file_name) 148 except ValueError: 149 return missing 150 151 return self.metadata.get(key, missing) 152 153 def date_time(self, missing: Optional[str]='', 154 ignore_file_modify_date: bool = False) -> Union[datetime.datetime, Any]: 155 """ 156 Tries to get value from key "DateTimeOriginal" 157 If that fails, tries "CreateDate", and then finally 158 FileModifyDate 159 160 :param ignore_file_modify_date: if True, don't return the file 161 modification date 162 :return python datetime format the date and time the video or photo was 163 recorded, else missing 164 """ 165 166 d = self._get('DateTimeOriginal', None) 167 if d is None: 168 d = self._get('CreateDate', None) 169 if d is None and not ignore_file_modify_date: 170 d = self._get('FileModifyDate', None) 171 if d is not None: 172 d = d.strip() 173 try: 174 dt, fs = flexible_date_time_parser(d) 175 logging.debug( 176 "Extracted %s time %s using ExifTool", self.file_type.name, dt.strftime(fs) 177 ) 178 179 except AssertionError: 180 logging.warning( 181 "Error extracting date time metadata '%s' for %s %s", 182 d, self.file_type.name, self.full_file_name 183 ) 184 return missing 185 186 except (ValueError, OverflowError): 187 logging.warning( 188 "Error parsing date time metadata '%s' for %s %s", 189 d, self.file_type.name, self.full_file_name 190 ) 191 return missing 192 except Exception: 193 logging.error( 194 "Unknown error parsing date time metadata '%s' for %s %s", 195 d, self.file_type.name, self.full_file_name 196 ) 197 return missing 198 199 return dt 200 else: 201 return missing 202 203 def timestamp(self, missing='') -> Union[float, Any]: 204 """ 205 Photo and Video 206 :return: a float value representing the time stamp, if it exists 207 """ 208 209 dt = self.date_time(missing=None) 210 if dt is not None: 211 try: 212 ts = dt.timestamp() 213 ts = float(ts) 214 except: 215 ts = missing 216 else: 217 ts = missing 218 return ts 219 220 def file_number(self, missing='') -> Union[str, Any]: 221 """ 222 Photo and video 223 :return: a string value representing the File number, if it exists 224 """ 225 226 v = self._get("FileNumber", None) 227 if v is not None: 228 return str(v) 229 else: 230 return missing 231 232 def width(self, missing='') -> Union[str, Any]: 233 v = self._get('ImageWidth', None) 234 if v is not None: 235 return str(v) 236 else: 237 return missing 238 239 def height(self, missing='') -> Union[str, Any]: 240 v = self._get('ImageHeight', None) 241 if v is not None: 242 return str(v) 243 else: 244 return missing 245 246 def length(self, missing='') -> Union[str, Any]: 247 """ 248 return the duration (length) of the video, rounded to the nearest second, in string format 249 """ 250 v = self._get("Duration", None) 251 if v is not None: 252 try: 253 v = float(v) 254 v = "%0.f" % v 255 except: 256 return missing 257 return v 258 else: 259 return missing 260 261 def frames_per_second(self, missing='') -> Union[str, Any]: 262 v = self._get("FrameRate", None) 263 if v is None: 264 v = self._get("VideoFrameRate", None) 265 266 if v is None: 267 return missing 268 try: 269 v = '%.0f' % v 270 except: 271 return missing 272 return v 273 274 def codec(self, missing='') -> Union[str, Any]: 275 v = self._get("VideoStreamType", None) 276 if v is None: 277 v = self._get("VideoCodec", None) 278 if v is not None: 279 return v 280 return missing 281 282 def fourcc(self, missing='') -> Union[str, Any]: 283 return self._get("CompressorID", missing) 284 285 def rotation(self, missing=0) -> Union[int, Any]: 286 v = self._get("Rotation", None) 287 if v is not None: 288 return v 289 return missing 290 291 def aperture(self, missing='') -> Union[str, Any]: 292 """ 293 Returns in string format the floating point value of the image's 294 aperture. 295 296 Returns missing if the metadata value is not present. 297 """ 298 v = self._get('FNumber', None) 299 try: 300 v = float(v) 301 except (ValueError, TypeError): # TypeError catches None 302 return missing 303 304 if v is not None: 305 return "{:.1f}".format(v) 306 return missing 307 308 def iso(self, missing='') -> Union[str, Any]: 309 """ 310 Returns in string format the integer value of the image's ISO. 311 312 Returns missing if the metadata value is not present. 313 """ 314 v = self._get('ISO', None) 315 if v: 316 return str(v) 317 return missing 318 319 320 def _exposure_time_rational(self, missing=None) -> Tuple[Any, Any]: 321 """ 322 Split exposure time value into fraction for further processing 323 :param missing: 324 :return: tuple of exposure time e.g. '1', '320' (for 1/320 sec) 325 or '2.5', 1 (for 2.5 secs) 326 """ 327 328 v = self._get('ExposureTime', None) 329 if v is None: 330 return missing, missing 331 v = str(v) 332 333 # ExifTool returns two distinct types values e.g.: 334 # '1/125' fraction (string) 335 # '2.5' floating point 336 337 # fractional format 338 if v.find('/') > 0: 339 return tuple(v.split('/')[:2]) 340 341 # already in floating point format 342 return v, 1 343 344 def exposure_time(self, alternativeFormat=False, missing='') -> Union[str, Any]: 345 """ 346 Returns in string format the exposure time of the image. 347 348 Returns missing if the metadata value is not present. 349 350 alternativeFormat is useful if the value is going to be used in a 351 purpose where / is an invalid character, e.g. file system names. 352 353 alternativeFormat is False: 354 For exposures less than one second, the result is formatted as a 355 fraction e.g. 1/125 356 For exposures greater than or equal to one second, the value is 357 formatted as an integer e.g. 30 358 359 alternativeFormat is True: 360 For exposures less than one second, the result is formatted as an 361 integer e.g. 125 362 For exposures less than one second but more than or equal to 363 one tenth of a second, the result is formatted as an integer 364 e.g. 3 representing 3/10 of a second 365 For exposures greater than or equal to one second, the value is 366 formatted as an integer with a trailing s e.g. 30s 367 """ 368 369 e0, e1 = self._exposure_time_rational() 370 371 if e0 is not None and e1 is not None: 372 373 if str(e0).find('.') > 0: 374 try: 375 assert e1 == 1 376 except AssertionError as e: 377 logging.exception('{}: {}, {}'.format(self.full_file_name, e0, e1)) 378 raise AssertionError from e 379 e0 = float(e0) 380 else: 381 try: 382 e0 = int(e0) 383 e1 = int(e1) 384 except ValueError as e: 385 logging.exception('{}: {}, {}'.format(self.full_file_name, e0, e1)) 386 raise ValueError from e 387 388 if e1 > e0: 389 if alternativeFormat: 390 if e0 == 1: 391 return str(e1) 392 else: 393 return str(e0) 394 else: 395 return "%s/%s" % (e0, e1) 396 elif e0 > e1: 397 e = float(e0) / e1 398 if alternativeFormat: 399 return "%.0fs" % e 400 else: 401 return "%.0f" % e 402 else: 403 return "1s" 404 else: 405 return missing 406 407 def focal_length(self, missing='') -> Union[str, Any]: 408 v = self._get('FocalLength', None) 409 if v is not None: 410 return str(v) 411 return missing 412 413 def camera_make(self, missing='') -> Union[str, Any]: 414 v = self._get('Make', None) 415 if v is not None: 416 return str(v) 417 return missing 418 419 def camera_model(self, missing='') -> Union[str, Any]: 420 v = self._get('Model', None) 421 if v is not None: 422 return str(v) 423 return missing 424 425 def short_camera_model(self, includeCharacters='', missing=''): 426 """ 427 Returns in shorterned string format the camera model used to record 428 the image. 429 430 Returns missing if the metadata value is not present. 431 432 The short format is determined by the first occurrence of a digit in 433 the 434 camera model, including all alphaNumeric characters before and after 435 that digit up till a non-alphanumeric character, but with these 436 interventions: 437 438 1. Canon "Mark" designations are shortened prior to conversion. 439 2. Names like "Canon EOS DIGITAL REBEL XSi" do not have a number and 440 must 441 and treated differently (see below) 442 443 Examples: 444 Canon EOS 300D DIGITAL -> 300D 445 Canon EOS 5D -> 5D 446 Canon EOS 5D Mark II -> 5DMkII 447 NIKON D2X -> D2X 448 NIKON D70 -> D70 449 X100,D540Z,C310Z -> X100 450 Canon EOS DIGITAL REBEL XSi -> XSi 451 Canon EOS Digital Rebel XS -> XS 452 Canon EOS Digital Rebel XTi -> XTi 453 Canon EOS Kiss Digital X -> Digital 454 Canon EOS Digital Rebel XT -> XT 455 EOS Kiss Digital -> Digital 456 Canon Digital IXUS Wireless -> Wireless 457 Canon Digital IXUS i zoom -> zoom 458 Canon EOS Kiss Digital N -> N 459 Canon Digital IXUS IIs -> IIs 460 IXY Digital L -> L 461 Digital IXUS i -> i 462 IXY Digital -> Digital 463 Digital IXUS -> IXUS 464 465 The optional includeCharacters allows additional characters to appear 466 before and after the digits. 467 Note: special includeCharacters MUST be escaped as per syntax of a 468 regular expressions (see documentation for module re) 469 470 Examples: 471 472 includeCharacters = '': 473 DSC-P92 -> P92 474 includeCharacters = '\-': 475 DSC-P92 -> DSC-P92 476 477 If a digit is not found in the camera model, the last word is returned. 478 479 Note: assume exif values are in ENGLISH, regardless of current platform 480 """ 481 m = self.camera_model() 482 m = m.replace(' Mark ', 'Mk') 483 if m: 484 s = r"(?:[^a-zA-Z0-9%s]?)(?P<model>[a-zA-Z0-9%s]*\d+[" \ 485 r"a-zA-Z0-9%s]*)" \ 486 % (includeCharacters, includeCharacters, includeCharacters) 487 r = re.search(s, m) 488 if r: 489 return r.group("model") 490 else: 491 head, space, model = m.strip().rpartition(' ') 492 return model 493 else: 494 return missing 495 496 def camera_serial(self, missing='') -> Union[str, Any]: 497 v = self._get('SerialNumber', None) 498 if v is not None: 499 return str(v) 500 return missing 501 502 def shutter_count(self, missing='') -> Union[str, Any]: 503 v = self._get('ShutterCount', None) 504 if v is None: 505 v = self._get('ImageNumber', None) 506 507 if v is not None: 508 return str(v) 509 return missing 510 511 def owner_name(self, missing='') -> Union[str, Any]: 512 513 # distinct from CopyrightOwnerName 514 v = self._get('OwnerName', None) 515 if v is not None: 516 return str(v) 517 return missing 518 519 def copyright(self, missing='') -> Union[str, Any]: 520 v = self._get('Copyright', None) 521 if v is not None: 522 return str(v) 523 return missing 524 525 def artist(self, missing=''): 526 v = self._get('Artist', None) 527 if v is not None: 528 return str(v) 529 return missing 530 531 def sub_seconds(self, missing='00') -> Union[str, Any]: 532 v = self._get('SubSecTime', None) 533 if v is not None: 534 return str(v) 535 return missing 536 537 def orientation(self, missing='') -> Union[str, Any]: 538 v = self._get('Orientation', None) 539 if v is not None: 540 return str(v) 541 return missing 542 543 def _get_binary(self, key: str) -> Optional[bytes]: 544 return self.et_process.execute_binary("-{}".format(key), self.full_file_name) 545 546 def get_small_thumbnail(self) -> Optional[bytes]: 547 """ 548 Get the small thumbnail image (if it exists) 549 :return: thumbnail image in raw bytes 550 """ 551 552 return self._get_binary("ThumbnailImage") 553 554 def get_indexed_preview(self, preview_number: int=0, force: bool=False) -> Optional[bytes]: 555 """ 556 Extract preview image from the metadata 557 If initial preview number does not work, tries others 558 559 :param preview_number: which preview to get 560 :param force: if True, get only that preview. Otherwise, take a flexible approach 561 where every preview is tried image, in order found in index_preview 562 :return: preview image in raw bytes, if found, else None 563 """ 564 565 key = self.index_preview[preview_number] 566 b = self._get_binary(key) 567 if b: 568 return b 569 if force: 570 return None 571 572 logging.debug( 573 "Attempt to extract %s using ExifTool from %s failed. Trying flexible approach.", 574 key, self.full_file_name 575 ) 576 577 assert not force 578 untried_indexes = ( 579 index for index in self.index_preview.keys() if index != preview_number 580 ) 581 582 valid_untried_indexes = [ 583 index for index in untried_indexes if self.index_preview[index] in self.metadata 584 ] 585 if valid_untried_indexes: 586 for index in valid_untried_indexes: 587 key = self.index_preview[index] 588 logging.debug("Attempting %s on %s...", key, self.full_file_name) 589 b = self._get_binary(key) 590 if b: 591 logging.debug("...attempt successful from %s", self.full_file_name) 592 return b 593 logging.debug("...attempt failed on %s", self.full_file_name) 594 else: 595 logging.debug( 596 "No other preview image indexes remain to be tried on %s", self.full_file_name 597 ) 598 599 logging.warning("ExifTool could not extract a preview image from %s", self.full_file_name) 600 return None 601 602 def get_small_thumbnail_or_first_indexed_preview(self) -> Optional[bytes]: 603 """ 604 First attempt to get the small thumbnail image. If it does not exist, 605 extract the smallest preview image from the metadata 606 607 :return: thumbnail / preview image in raw bytes, if found, else None 608 """ 609 610 # Look for "ThumbnailImage" if the file format supports it 611 if self.ext not in self.preview_smallest or self.ext in self.may_have_thumbnail: 612 thumbnail = self.get_small_thumbnail() 613 if thumbnail is not None: 614 return thumbnail 615 616 # Otherwise look for the smallest preview image for this format 617 if self.ext in self.preview_smallest: 618 for index in self.preview_smallest[self.ext]: 619 thumbnail = self.get_indexed_preview(preview_number=index, force=True) 620 if thumbnail: 621 return thumbnail 622 623 # If that fails, take a flexible approach 624 return self.get_indexed_preview(force=False) 625 626 def get_preview_256(self) -> Optional[bytes]: 627 """ 628 :return: if possible, return a preview image that is preferably larger than 256 pixels, 629 else the smallest preview if it exists 630 """ 631 632 # look for the smallest preview 633 if self.ext in self.preview256: 634 for index in self.preview256[self.ext]: 635 thumbnail = self.get_indexed_preview(preview_number=index, force=True) 636 if thumbnail is not None: 637 return thumbnail 638 639 # If that fails, take a flexible approach 640 return self.get_indexed_preview(force=False) 641 642 def preview_names(self) -> Optional[List[str]]: 643 """ 644 Names of preview image located in the file, excluding the tag ThumbnailImage 645 646 :return None if unsuccessful, else names of preview images 647 """ 648 649 if not self.metadata: 650 try: 651 self.metadata = self.et_process.get_metadata(self.full_file_name) 652 except ValueError: 653 return None 654 655 return [v for v in self.index_preview.values() if v in self.metadata] 656 657 658if __name__ == '__main__': 659 import sys 660 661 with exiftool.ExifTool() as et_process: 662 if (len(sys.argv) != 2): 663 print('Usage: ' + sys.argv[0] + ' path/to/video_or_photo/containing/metadata') 664 else: 665 file = sys.argv[1] 666 667 print("ExifTool", programversions.exiftool_version_info()) 668 file_type = fileformats.file_type_from_splitext(file_name=file) 669 if file_type is None: 670 print("Unsupported file type") 671 sys.exit(1) 672 m = MetadataExiftool(file, et_process, file_type) 673 print(m.date_time()) 674 print("f" + m.aperture('missing ')) 675 print("ISO " + m.iso('missing ')) 676 print(m.exposure_time(missing='missing ') + " sec") 677 print(m.exposure_time(alternativeFormat=True, missing='missing ')) 678 print(m.focal_length('missing ') + "mm") 679 print(m.camera_make()) 680 print(m.camera_model()) 681 print('Serial number:', m.camera_serial(missing='missing')) 682 print('Shutter count:', m.shutter_count()) 683 print('Owner name:', m.owner_name()) 684 print('Copyright:', m.copyright()) 685 print('Artist', m.artist()) 686 print('Subseconds:', m.sub_seconds()) 687 print('Orientation:', m.orientation()) 688 print('Preview names (excluding Thumbnail): ', m.preview_names()) 689 preview = m.get_small_thumbnail_or_first_indexed_preview() 690 691 thumb = m.get_small_thumbnail() 692 if thumb: 693 print('Thumbnail size: {} bytes'.format(len(thumb))) 694 else: 695 print('No thumbnail detected') 696 697 previews = et_process.execute(file.encode(), b'-preview:all') 698 print("ExifTool raw output:") 699 if previews: 700 print(previews.decode()) 701 else: 702 print('No previews detected') 703 704 705 706 707 # print("%sx%s" % (m.width(), m.height())) 708 # print("Length:", m.length()) 709 # print("FPS: ", m.frames_per_second()) 710 # print("Codec:", m.codec())