1# -*- coding: utf-8 -*- 2# Copyright (C) 2006 Joe Wreschnig 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8 9"""Read and write MPEG-4 audio files with iTunes metadata. 10 11This module will read MPEG-4 audio information and metadata, 12as found in Apple's MP4 (aka M4A, M4B, M4P) files. 13 14There is no official specification for this format. The source code 15for TagLib, FAAD, and various MPEG specifications at 16 17* http://developer.apple.com/documentation/QuickTime/QTFF/ 18* http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt 19* http://standards.iso.org/ittf/PubliclyAvailableStandards/\ 20c041828_ISO_IEC_14496-12_2005(E).zip 21* http://wiki.multimedia.cx/index.php?title=Apple_QuickTime 22 23were all consulted. 24""" 25 26import struct 27import sys 28 29from mutagen import FileType, Tags, StreamInfo, PaddingInfo 30from mutagen._constants import GENRES 31from mutagen._util import cdata, insert_bytes, DictProxy, MutagenError, \ 32 hashable, enum, get_size, resize_bytes, loadfile, convert_error 33from mutagen._compat import (reraise, PY2, string_types, text_type, chr_, 34 iteritems, PY3, cBytesIO, izip, xrange) 35from ._atom import Atoms, Atom, AtomError 36from ._util import parse_full_atom 37from ._as_entry import AudioSampleEntry, ASEntryError 38 39 40class error(MutagenError): 41 pass 42 43 44class MP4MetadataError(error): 45 pass 46 47 48class MP4StreamInfoError(error): 49 pass 50 51 52class MP4NoTrackError(MP4StreamInfoError): 53 pass 54 55 56class MP4MetadataValueError(ValueError, MP4MetadataError): 57 pass 58 59 60__all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm', 'AtomDataType'] 61 62 63@enum 64class AtomDataType(object): 65 """Enum for ``dataformat`` attribute of MP4FreeForm. 66 67 .. versionadded:: 1.25 68 """ 69 70 IMPLICIT = 0 71 """for use with tags for which no type needs to be indicated because 72 only one type is allowed""" 73 74 UTF8 = 1 75 """without any count or null terminator""" 76 77 UTF16 = 2 78 """also known as UTF-16BE""" 79 80 SJIS = 3 81 """deprecated unless it is needed for special Japanese characters""" 82 83 HTML = 6 84 """the HTML file header specifies which HTML version""" 85 86 XML = 7 87 """the XML header must identify the DTD or schemas""" 88 89 UUID = 8 90 """also known as GUID; stored as 16 bytes in binary (valid as an ID)""" 91 92 ISRC = 9 93 """stored as UTF-8 text (valid as an ID)""" 94 95 MI3P = 10 96 """stored as UTF-8 text (valid as an ID)""" 97 98 GIF = 12 99 """(deprecated) a GIF image""" 100 101 JPEG = 13 102 """a JPEG image""" 103 104 PNG = 14 105 """PNG image""" 106 107 URL = 15 108 """absolute, in UTF-8 characters""" 109 110 DURATION = 16 111 """in milliseconds, 32-bit integer""" 112 113 DATETIME = 17 114 """in UTC, counting seconds since midnight, January 1, 1904; 115 32 or 64-bits""" 116 117 GENRES = 18 118 """a list of enumerated values""" 119 120 INTEGER = 21 121 """a signed big-endian integer with length one of { 1,2,3,4,8 } bytes""" 122 123 RIAA_PA = 24 124 """RIAA parental advisory; { -1=no, 1=yes, 0=unspecified }, 125 8-bit ingteger""" 126 127 UPC = 25 128 """Universal Product Code, in text UTF-8 format (valid as an ID)""" 129 130 BMP = 27 131 """Windows bitmap image""" 132 133 134@hashable 135class MP4Cover(bytes): 136 """A cover artwork. 137 138 Attributes: 139 imageformat (`AtomDataType`): format of the image 140 (either FORMAT_JPEG or FORMAT_PNG) 141 """ 142 143 FORMAT_JPEG = AtomDataType.JPEG 144 FORMAT_PNG = AtomDataType.PNG 145 146 def __new__(cls, data, *args, **kwargs): 147 return bytes.__new__(cls, data) 148 149 def __init__(self, data, imageformat=FORMAT_JPEG): 150 self.imageformat = imageformat 151 152 __hash__ = bytes.__hash__ 153 154 def __eq__(self, other): 155 if not isinstance(other, MP4Cover): 156 return bytes(self) == other 157 158 return (bytes(self) == bytes(other) and 159 self.imageformat == other.imageformat) 160 161 def __ne__(self, other): 162 return not self.__eq__(other) 163 164 def __repr__(self): 165 return "%s(%r, %r)" % ( 166 type(self).__name__, bytes(self), 167 AtomDataType(self.imageformat)) 168 169 170@hashable 171class MP4FreeForm(bytes): 172 """A freeform value. 173 174 Attributes: 175 dataformat (`AtomDataType`): format of the data (see AtomDataType) 176 """ 177 178 FORMAT_DATA = AtomDataType.IMPLICIT # deprecated 179 FORMAT_TEXT = AtomDataType.UTF8 # deprecated 180 181 def __new__(cls, data, *args, **kwargs): 182 return bytes.__new__(cls, data) 183 184 def __init__(self, data, dataformat=AtomDataType.UTF8, version=0): 185 self.dataformat = dataformat 186 self.version = version 187 188 __hash__ = bytes.__hash__ 189 190 def __eq__(self, other): 191 if not isinstance(other, MP4FreeForm): 192 return bytes(self) == other 193 194 return (bytes(self) == bytes(other) and 195 self.dataformat == other.dataformat and 196 self.version == other.version) 197 198 def __ne__(self, other): 199 return not self.__eq__(other) 200 201 def __repr__(self): 202 return "%s(%r, %r)" % ( 203 type(self).__name__, bytes(self), 204 AtomDataType(self.dataformat)) 205 206 207def _name2key(name): 208 if PY2: 209 return name 210 return name.decode("latin-1") 211 212 213def _key2name(key): 214 if PY2: 215 return key 216 return key.encode("latin-1") 217 218 219def _find_padding(atom_path): 220 # Check for padding "free" atom 221 # XXX: we only use them if they are adjacent to ilst, and only one. 222 # and there also is a top level free atom which we could use maybe..? 223 224 meta, ilst = atom_path[-2:] 225 assert meta.name == b"meta" and ilst.name == b"ilst" 226 index = meta.children.index(ilst) 227 try: 228 prev = meta.children[index - 1] 229 if prev.name == b"free": 230 return prev 231 except IndexError: 232 pass 233 234 try: 235 next_ = meta.children[index + 1] 236 if next_.name == b"free": 237 return next_ 238 except IndexError: 239 pass 240 241 242def _item_sort_key(key, value): 243 # iTunes always writes the tags in order of "relevance", try 244 # to copy it as closely as possible. 245 order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", 246 "\xa9gen", "gnre", "trkn", "disk", 247 "\xa9day", "cpil", "pgap", "pcst", "tmpo", 248 "\xa9too", "----", "covr", "\xa9lyr"] 249 order = dict(izip(order, xrange(len(order)))) 250 last = len(order) 251 # If there's no key-based way to distinguish, order by length. 252 # If there's still no way, go by string comparison on the 253 # values, so we at least have something determinstic. 254 return (order.get(key[:4], last), len(repr(value)), repr(value)) 255 256 257class MP4Tags(DictProxy, Tags): 258 r"""MP4Tags() 259 260 Dictionary containing Apple iTunes metadata list key/values. 261 262 Keys are four byte identifiers, except for freeform ('----') 263 keys. Values are usually unicode strings, but some atoms have a 264 special structure: 265 266 Text values (multiple values per key are supported): 267 268 * '\\xa9nam' -- track title 269 * '\\xa9alb' -- album 270 * '\\xa9ART' -- artist 271 * 'aART' -- album artist 272 * '\\xa9wrt' -- composer 273 * '\\xa9day' -- year 274 * '\\xa9cmt' -- comment 275 * 'desc' -- description (usually used in podcasts) 276 * 'purd' -- purchase date 277 * '\\xa9grp' -- grouping 278 * '\\xa9gen' -- genre 279 * '\\xa9lyr' -- lyrics 280 * 'purl' -- podcast URL 281 * 'egid' -- podcast episode GUID 282 * 'catg' -- podcast category 283 * 'keyw' -- podcast keywords 284 * '\\xa9too' -- encoded by 285 * 'cprt' -- copyright 286 * 'soal' -- album sort order 287 * 'soaa' -- album artist sort order 288 * 'soar' -- artist sort order 289 * 'sonm' -- title sort order 290 * 'soco' -- composer sort order 291 * 'sosn' -- show sort order 292 * 'tvsh' -- show name 293 * '\\xa9wrk' -- work 294 * '\\xa9mvn' -- movement 295 296 Boolean values: 297 298 * 'cpil' -- part of a compilation 299 * 'pgap' -- part of a gapless album 300 * 'pcst' -- podcast (iTunes reads this only on import) 301 302 Tuples of ints (multiple values per key are supported): 303 304 * 'trkn' -- track number, total tracks 305 * 'disk' -- disc number, total discs 306 307 Integer values: 308 309 * 'tmpo' -- tempo/BPM 310 * '\\xa9mvc' -- Movement Count 311 * '\\xa9mvi' -- Movement Index 312 * 'shwm' -- work/movement 313 * 'stik' -- Media Kind 314 * 'rtng' -- Content Rating 315 * 'tves' -- TV Episode 316 * 'tvsn' -- TV Season 317 * 'plID', 'cnID', 'geID', 'atID', 'sfID', 'cmID', 'akID' -- Various iTunes 318 Internal IDs 319 320 Others: 321 322 * 'covr' -- cover artwork, list of MP4Cover objects (which are 323 tagged strs) 324 * 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead. 325 326 The freeform '----' frames use a key in the format '----:mean:name' 327 where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique 328 identifier for this frame. The value is a str, but is probably 329 text that can be decoded as UTF-8. Multiple values per key are 330 supported. 331 332 MP4 tag data cannot exist outside of the structure of an MP4 file, 333 so this class should not be manually instantiated. 334 335 Unknown non-text tags and tags that failed to parse will be written 336 back as is. 337 """ 338 339 def __init__(self, *args, **kwargs): 340 self._failed_atoms = {} 341 super(MP4Tags, self).__init__() 342 if args or kwargs: 343 self.load(*args, **kwargs) 344 345 def load(self, atoms, fileobj): 346 try: 347 path = atoms.path(b"moov", b"udta", b"meta", b"ilst") 348 except KeyError as key: 349 raise MP4MetadataError(key) 350 351 free = _find_padding(path) 352 self._padding = free.datalength if free is not None else 0 353 354 ilst = path[-1] 355 for atom in ilst.children: 356 ok, data = atom.read(fileobj) 357 if not ok: 358 raise MP4MetadataError("Not enough data") 359 360 try: 361 if atom.name in self.__atoms: 362 info = self.__atoms[atom.name] 363 info[0](self, atom, data) 364 else: 365 # unknown atom, try as text 366 self.__parse_text(atom, data, implicit=False) 367 except MP4MetadataError: 368 # parsing failed, save them so we can write them back 369 key = _name2key(atom.name) 370 self._failed_atoms.setdefault(key, []).append(data) 371 372 def __setitem__(self, key, value): 373 if not isinstance(key, str): 374 raise TypeError("key has to be str") 375 self._render(key, value) 376 super(MP4Tags, self).__setitem__(key, value) 377 378 @classmethod 379 def _can_load(cls, atoms): 380 return b"moov.udta.meta.ilst" in atoms 381 382 def _render(self, key, value): 383 atom_name = _key2name(key)[:4] 384 if atom_name in self.__atoms: 385 render_func = self.__atoms[atom_name][1] 386 render_args = self.__atoms[atom_name][2:] 387 else: 388 render_func = type(self).__render_text 389 render_args = [] 390 391 return render_func(self, key, value, *render_args) 392 393 @convert_error(IOError, error) 394 @loadfile(writable=True) 395 def save(self, filething=None, padding=None): 396 397 values = [] 398 items = sorted(self.items(), key=lambda kv: _item_sort_key(*kv)) 399 for key, value in items: 400 try: 401 values.append(self._render(key, value)) 402 except (TypeError, ValueError) as s: 403 reraise(MP4MetadataValueError, s, sys.exc_info()[2]) 404 405 for key, failed in iteritems(self._failed_atoms): 406 # don't write atoms back if we have added a new one with 407 # the same name, this excludes freeform which can have 408 # multiple atoms with the same key (most parsers seem to be able 409 # to handle that) 410 if key in self: 411 assert _key2name(key) != b"----" 412 continue 413 for data in failed: 414 values.append(Atom.render(_key2name(key), data)) 415 416 data = Atom.render(b"ilst", b"".join(values)) 417 418 # Find the old atoms. 419 try: 420 atoms = Atoms(filething.fileobj) 421 except AtomError as err: 422 reraise(error, err, sys.exc_info()[2]) 423 424 self.__save(filething.fileobj, atoms, data, padding) 425 426 def __save(self, fileobj, atoms, data, padding): 427 try: 428 path = atoms.path(b"moov", b"udta", b"meta", b"ilst") 429 except KeyError: 430 self.__save_new(fileobj, atoms, data, padding) 431 else: 432 self.__save_existing(fileobj, atoms, path, data, padding) 433 434 def __save_new(self, fileobj, atoms, ilst_data, padding_func): 435 hdlr = Atom.render(b"hdlr", b"\x00" * 8 + b"mdirappl" + b"\x00" * 9) 436 meta_data = b"\x00\x00\x00\x00" + hdlr + ilst_data 437 438 try: 439 path = atoms.path(b"moov", b"udta") 440 except KeyError: 441 path = atoms.path(b"moov") 442 443 offset = path[-1]._dataoffset 444 445 # ignoring some atom overhead... but we don't have padding left anyway 446 # and padding_size is guaranteed to be less than zero 447 content_size = get_size(fileobj) - offset 448 padding_size = -len(meta_data) 449 assert padding_size < 0 450 info = PaddingInfo(padding_size, content_size) 451 new_padding = info._get_padding(padding_func) 452 new_padding = min(0xFFFFFFFF, new_padding) 453 454 free = Atom.render(b"free", b"\x00" * new_padding) 455 meta = Atom.render(b"meta", meta_data + free) 456 if path[-1].name != b"udta": 457 # moov.udta not found -- create one 458 data = Atom.render(b"udta", meta) 459 else: 460 data = meta 461 462 insert_bytes(fileobj, len(data), offset) 463 fileobj.seek(offset) 464 fileobj.write(data) 465 self.__update_parents(fileobj, path, len(data)) 466 self.__update_offsets(fileobj, atoms, len(data), offset) 467 468 def __save_existing(self, fileobj, atoms, path, ilst_data, padding_func): 469 # Replace the old ilst atom. 470 ilst = path[-1] 471 offset = ilst.offset 472 length = ilst.length 473 474 # Use adjacent free atom if there is one 475 free = _find_padding(path) 476 if free is not None: 477 offset = min(offset, free.offset) 478 length += free.length 479 480 # Always add a padding atom to make things easier 481 padding_overhead = len(Atom.render(b"free", b"")) 482 content_size = get_size(fileobj) - (offset + length) 483 padding_size = length - (len(ilst_data) + padding_overhead) 484 info = PaddingInfo(padding_size, content_size) 485 new_padding = info._get_padding(padding_func) 486 # Limit padding size so we can be sure the free atom overhead is as we 487 # calculated above (see Atom.render) 488 new_padding = min(0xFFFFFFFF, new_padding) 489 490 ilst_data += Atom.render(b"free", b"\x00" * new_padding) 491 492 resize_bytes(fileobj, length, len(ilst_data), offset) 493 delta = len(ilst_data) - length 494 495 fileobj.seek(offset) 496 fileobj.write(ilst_data) 497 self.__update_parents(fileobj, path[:-1], delta) 498 self.__update_offsets(fileobj, atoms, delta, offset) 499 500 def __update_parents(self, fileobj, path, delta): 501 """Update all parent atoms with the new size.""" 502 503 if delta == 0: 504 return 505 506 for atom in path: 507 fileobj.seek(atom.offset) 508 size = cdata.uint_be(fileobj.read(4)) 509 if size == 1: # 64bit 510 # skip name (4B) and read size (8B) 511 size = cdata.ulonglong_be(fileobj.read(12)[4:]) 512 fileobj.seek(atom.offset + 8) 513 fileobj.write(cdata.to_ulonglong_be(size + delta)) 514 else: # 32bit 515 fileobj.seek(atom.offset) 516 fileobj.write(cdata.to_uint_be(size + delta)) 517 518 def __update_offset_table(self, fileobj, fmt, atom, delta, offset): 519 """Update offset table in the specified atom.""" 520 if atom.offset > offset: 521 atom.offset += delta 522 fileobj.seek(atom.offset + 12) 523 data = fileobj.read(atom.length - 12) 524 fmt = fmt % cdata.uint_be(data[:4]) 525 offsets = struct.unpack(fmt, data[4:]) 526 offsets = [o + (0, delta)[offset < o] for o in offsets] 527 fileobj.seek(atom.offset + 16) 528 fileobj.write(struct.pack(fmt, *offsets)) 529 530 def __update_tfhd(self, fileobj, atom, delta, offset): 531 if atom.offset > offset: 532 atom.offset += delta 533 fileobj.seek(atom.offset + 9) 534 data = fileobj.read(atom.length - 9) 535 flags = cdata.uint_be(b"\x00" + data[:3]) 536 if flags & 1: 537 o = cdata.ulonglong_be(data[7:15]) 538 if o > offset: 539 o += delta 540 fileobj.seek(atom.offset + 16) 541 fileobj.write(cdata.to_ulonglong_be(o)) 542 543 def __update_offsets(self, fileobj, atoms, delta, offset): 544 """Update offset tables in all 'stco' and 'co64' atoms.""" 545 if delta == 0: 546 return 547 moov = atoms[b"moov"] 548 for atom in moov.findall(b'stco', True): 549 self.__update_offset_table(fileobj, ">%dI", atom, delta, offset) 550 for atom in moov.findall(b'co64', True): 551 self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset) 552 try: 553 for atom in atoms[b"moof"].findall(b'tfhd', True): 554 self.__update_tfhd(fileobj, atom, delta, offset) 555 except KeyError: 556 pass 557 558 def __parse_data(self, atom, data): 559 pos = 0 560 while pos < atom.length - 8: 561 head = data[pos:pos + 12] 562 if len(head) != 12: 563 raise MP4MetadataError("truncated atom % r" % atom.name) 564 length, name = struct.unpack(">I4s", head[:8]) 565 version = ord(head[8:9]) 566 flags = struct.unpack(">I", b"\x00" + head[9:12])[0] 567 if name != b"data": 568 raise MP4MetadataError( 569 "unexpected atom %r inside %r" % (name, atom.name)) 570 571 chunk = data[pos + 16:pos + length] 572 if len(chunk) != length - 16: 573 raise MP4MetadataError("truncated atom % r" % atom.name) 574 yield version, flags, chunk 575 pos += length 576 577 def __add(self, key, value, single=False): 578 assert isinstance(key, str) 579 580 if single: 581 self[key] = value 582 else: 583 self.setdefault(key, []).extend(value) 584 585 def __render_data(self, key, version, flags, value): 586 return Atom.render(_key2name(key), b"".join([ 587 Atom.render( 588 b"data", struct.pack(">2I", version << 24 | flags, 0) + data) 589 for data in value])) 590 591 def __parse_freeform(self, atom, data): 592 length = cdata.uint_be(data[:4]) 593 mean = data[12:length] 594 pos = length 595 length = cdata.uint_be(data[pos:pos + 4]) 596 name = data[pos + 12:pos + length] 597 pos += length 598 value = [] 599 while pos < atom.length - 8: 600 length, atom_name = struct.unpack(">I4s", data[pos:pos + 8]) 601 if atom_name != b"data": 602 raise MP4MetadataError( 603 "unexpected atom %r inside %r" % (atom_name, atom.name)) 604 605 version = ord(data[pos + 8:pos + 8 + 1]) 606 flags = struct.unpack(">I", b"\x00" + data[pos + 9:pos + 12])[0] 607 value.append(MP4FreeForm(data[pos + 16:pos + length], 608 dataformat=flags, version=version)) 609 pos += length 610 611 key = _name2key(atom.name + b":" + mean + b":" + name) 612 self.__add(key, value) 613 614 def __render_freeform(self, key, value): 615 if isinstance(value, bytes): 616 value = [value] 617 618 dummy, mean, name = _key2name(key).split(b":", 2) 619 mean = struct.pack(">I4sI", len(mean) + 12, b"mean", 0) + mean 620 name = struct.pack(">I4sI", len(name) + 12, b"name", 0) + name 621 622 data = b"" 623 for v in value: 624 flags = AtomDataType.UTF8 625 version = 0 626 if isinstance(v, MP4FreeForm): 627 flags = v.dataformat 628 version = v.version 629 630 data += struct.pack( 631 ">I4s2I", len(v) + 16, b"data", version << 24 | flags, 0) 632 data += v 633 634 return Atom.render(b"----", mean + name + data) 635 636 def __parse_pair(self, atom, data): 637 key = _name2key(atom.name) 638 values = [struct.unpack(">2H", d[2:6]) for 639 version, flags, d in self.__parse_data(atom, data)] 640 self.__add(key, values) 641 642 def __render_pair(self, key, value): 643 data = [] 644 for v in value: 645 try: 646 track, total = v 647 except TypeError: 648 raise ValueError 649 if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: 650 data.append(struct.pack(">4H", 0, track, total, 0)) 651 else: 652 raise MP4MetadataValueError( 653 "invalid numeric pair %r" % ((track, total),)) 654 return self.__render_data(key, 0, AtomDataType.IMPLICIT, data) 655 656 def __render_pair_no_trailing(self, key, value): 657 data = [] 658 for (track, total) in value: 659 if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: 660 data.append(struct.pack(">3H", 0, track, total)) 661 else: 662 raise MP4MetadataValueError( 663 "invalid numeric pair %r" % ((track, total),)) 664 return self.__render_data(key, 0, AtomDataType.IMPLICIT, data) 665 666 def __parse_genre(self, atom, data): 667 values = [] 668 for version, flags, data in self.__parse_data(atom, data): 669 # version = 0, flags = 0 670 if len(data) != 2: 671 raise MP4MetadataValueError("invalid genre") 672 genre = cdata.short_be(data) 673 # Translate to a freeform genre. 674 try: 675 genre = GENRES[genre - 1] 676 except IndexError: 677 # this will make us write it back at least 678 raise MP4MetadataValueError("unknown genre") 679 values.append(genre) 680 key = _name2key(b"\xa9gen") 681 self.__add(key, values) 682 683 def __parse_integer(self, atom, data): 684 values = [] 685 for version, flags, data in self.__parse_data(atom, data): 686 if version != 0: 687 raise MP4MetadataValueError("unsupported version") 688 if flags not in (AtomDataType.IMPLICIT, AtomDataType.INTEGER): 689 raise MP4MetadataValueError("unsupported type") 690 691 if len(data) == 1: 692 value = cdata.int8(data) 693 elif len(data) == 2: 694 value = cdata.int16_be(data) 695 elif len(data) == 3: 696 value = cdata.int32_be(data + b"\x00") >> 8 697 elif len(data) == 4: 698 value = cdata.int32_be(data) 699 elif len(data) == 8: 700 value = cdata.int64_be(data) 701 else: 702 raise MP4MetadataValueError( 703 "invalid value size %d" % len(data)) 704 values.append(value) 705 706 key = _name2key(atom.name) 707 self.__add(key, values) 708 709 def __render_integer(self, key, value, min_bytes): 710 assert min_bytes in (1, 2, 4, 8) 711 712 data_list = [] 713 try: 714 for v in value: 715 # We default to the int size of the usual values written 716 # by itunes for compatibility. 717 if cdata.int8_min <= v <= cdata.int8_max and min_bytes <= 1: 718 data = cdata.to_int8(v) 719 elif cdata.int16_min <= v <= cdata.int16_max and \ 720 min_bytes <= 2: 721 data = cdata.to_int16_be(v) 722 elif cdata.int32_min <= v <= cdata.int32_max and \ 723 min_bytes <= 4: 724 data = cdata.to_int32_be(v) 725 elif cdata.int64_min <= v <= cdata.int64_max and \ 726 min_bytes <= 8: 727 data = cdata.to_int64_be(v) 728 else: 729 raise MP4MetadataValueError( 730 "value out of range: %r" % value) 731 data_list.append(data) 732 733 except (TypeError, ValueError, cdata.error) as e: 734 raise MP4MetadataValueError(e) 735 736 return self.__render_data(key, 0, AtomDataType.INTEGER, data_list) 737 738 def __parse_bool(self, atom, data): 739 for version, flags, data in self.__parse_data(atom, data): 740 if len(data) != 1: 741 raise MP4MetadataValueError("invalid bool") 742 743 value = bool(ord(data)) 744 key = _name2key(atom.name) 745 self.__add(key, value, single=True) 746 747 def __render_bool(self, key, value): 748 return self.__render_data( 749 key, 0, AtomDataType.INTEGER, [chr_(bool(value))]) 750 751 def __parse_cover(self, atom, data): 752 values = [] 753 pos = 0 754 while pos < atom.length - 8: 755 length, name, imageformat = struct.unpack(">I4sI", 756 data[pos:pos + 12]) 757 if name != b"data": 758 if name == b"name": 759 pos += length 760 continue 761 raise MP4MetadataError( 762 "unexpected atom %r inside 'covr'" % name) 763 if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG): 764 # Sometimes AtomDataType.IMPLICIT or simply wrong. 765 # In all cases it was jpeg, so default to it 766 imageformat = MP4Cover.FORMAT_JPEG 767 cover = MP4Cover(data[pos + 16:pos + length], imageformat) 768 values.append(cover) 769 pos += length 770 771 key = _name2key(atom.name) 772 self.__add(key, values) 773 774 def __render_cover(self, key, value): 775 atom_data = [] 776 for cover in value: 777 try: 778 imageformat = cover.imageformat 779 except AttributeError: 780 imageformat = MP4Cover.FORMAT_JPEG 781 atom_data.append(Atom.render( 782 b"data", struct.pack(">2I", imageformat, 0) + cover)) 783 return Atom.render(_key2name(key), b"".join(atom_data)) 784 785 def __parse_text(self, atom, data, implicit=True): 786 # implicit = False, for parsing unknown atoms only take utf8 ones. 787 # For known ones we can assume the implicit are utf8 too. 788 values = [] 789 for version, flags, atom_data in self.__parse_data(atom, data): 790 if implicit: 791 if flags not in (AtomDataType.IMPLICIT, AtomDataType.UTF8): 792 raise MP4MetadataError( 793 "Unknown atom type %r for %r" % (flags, atom.name)) 794 else: 795 if flags != AtomDataType.UTF8: 796 raise MP4MetadataError( 797 "%r is not text, ignore" % atom.name) 798 799 try: 800 text = atom_data.decode("utf-8") 801 except UnicodeDecodeError as e: 802 raise MP4MetadataError("%s: %s" % (_name2key(atom.name), e)) 803 804 values.append(text) 805 806 key = _name2key(atom.name) 807 self.__add(key, values) 808 809 def __render_text(self, key, value, flags=AtomDataType.UTF8): 810 if isinstance(value, string_types): 811 value = [value] 812 813 encoded = [] 814 for v in value: 815 if not isinstance(v, text_type): 816 if PY3: 817 raise TypeError("%r not str" % v) 818 try: 819 v = v.decode("utf-8") 820 except (AttributeError, UnicodeDecodeError) as e: 821 raise TypeError(e) 822 encoded.append(v.encode("utf-8")) 823 824 return self.__render_data(key, 0, flags, encoded) 825 826 def delete(self, filename): 827 """Remove the metadata from the given filename.""" 828 829 self._failed_atoms.clear() 830 self.clear() 831 self.save(filename, padding=lambda x: 0) 832 833 __atoms = { 834 b"----": (__parse_freeform, __render_freeform), 835 b"trkn": (__parse_pair, __render_pair), 836 b"disk": (__parse_pair, __render_pair_no_trailing), 837 b"gnre": (__parse_genre, None), 838 b"plID": (__parse_integer, __render_integer, 8), 839 b"cnID": (__parse_integer, __render_integer, 4), 840 b"geID": (__parse_integer, __render_integer, 4), 841 b"atID": (__parse_integer, __render_integer, 4), 842 b"sfID": (__parse_integer, __render_integer, 4), 843 b"cmID": (__parse_integer, __render_integer, 4), 844 b"akID": (__parse_integer, __render_integer, 1), 845 b"tvsn": (__parse_integer, __render_integer, 4), 846 b"tves": (__parse_integer, __render_integer, 4), 847 b"tmpo": (__parse_integer, __render_integer, 2), 848 b"\xa9mvi": (__parse_integer, __render_integer, 2), 849 b"\xa9mvc": (__parse_integer, __render_integer, 2), 850 b"cpil": (__parse_bool, __render_bool), 851 b"pgap": (__parse_bool, __render_bool), 852 b"pcst": (__parse_bool, __render_bool), 853 b"shwm": (__parse_integer, __render_integer, 1), 854 b"stik": (__parse_integer, __render_integer, 1), 855 b"rtng": (__parse_integer, __render_integer, 1), 856 b"covr": (__parse_cover, __render_cover), 857 b"purl": (__parse_text, __render_text), 858 b"egid": (__parse_text, __render_text), 859 } 860 861 # these allow implicit flags and parse as text 862 for name in [b"\xa9nam", b"\xa9alb", b"\xa9ART", b"aART", b"\xa9wrt", 863 b"\xa9day", b"\xa9cmt", b"desc", b"purd", b"\xa9grp", 864 b"\xa9gen", b"\xa9lyr", b"catg", b"keyw", b"\xa9too", 865 b"cprt", b"soal", b"soaa", b"soar", b"sonm", b"soco", 866 b"sosn", b"tvsh"]: 867 __atoms[name] = (__parse_text, __render_text) 868 869 def pprint(self): 870 871 def to_line(key, value): 872 assert isinstance(key, text_type) 873 if isinstance(value, text_type): 874 return u"%s=%s" % (key, value) 875 return u"%s=%r" % (key, value) 876 877 values = [] 878 for key, value in sorted(iteritems(self)): 879 if not isinstance(key, text_type): 880 key = key.decode("latin-1") 881 if key == "covr": 882 values.append(u"%s=%s" % (key, u", ".join( 883 [u"[%d bytes of data]" % len(data) for data in value]))) 884 elif isinstance(value, list): 885 for v in value: 886 values.append(to_line(key, v)) 887 else: 888 values.append(to_line(key, value)) 889 return u"\n".join(values) 890 891 892class MP4Info(StreamInfo): 893 """MP4Info() 894 895 MPEG-4 stream information. 896 897 Attributes: 898 bitrate (`int`): bitrate in bits per second, as an int 899 length (`float`): file length in seconds, as a float 900 channels (`int`): number of audio channels 901 sample_rate (`int`): audio sampling rate in Hz 902 bits_per_sample (`int`): bits per sample 903 codec (`mutagen.text`): 904 * if starting with ``"mp4a"`` uses an mp4a audio codec 905 (see the codec parameter in rfc6381 for details e.g. 906 ``"mp4a.40.2"``) 907 * for everything else see a list of possible values at 908 http://www.mp4ra.org/codecs.html 909 910 e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc. 911 codec_description (`mutagen.text`): 912 Name of the codec used (ALAC, AAC LC, AC-3...). Values might 913 change in the future, use for display purposes only. 914 """ 915 916 bitrate = 0 917 length = 0.0 918 channels = 0 919 sample_rate = 0 920 bits_per_sample = 0 921 codec = u"" 922 codec_description = u"" 923 924 def __init__(self, *args, **kwargs): 925 if args or kwargs: 926 self.load(*args, **kwargs) 927 928 @convert_error(IOError, MP4StreamInfoError) 929 def load(self, atoms, fileobj): 930 try: 931 moov = atoms[b"moov"] 932 except KeyError: 933 raise MP4StreamInfoError("not a MP4 file") 934 935 for trak in moov.findall(b"trak"): 936 hdlr = trak[b"mdia", b"hdlr"] 937 ok, data = hdlr.read(fileobj) 938 if not ok: 939 raise MP4StreamInfoError("Not enough data") 940 if data[8:12] == b"soun": 941 break 942 else: 943 raise MP4NoTrackError("track has no audio data") 944 945 mdhd = trak[b"mdia", b"mdhd"] 946 ok, data = mdhd.read(fileobj) 947 if not ok: 948 raise MP4StreamInfoError("Not enough data") 949 950 try: 951 version, flags, data = parse_full_atom(data) 952 except ValueError as e: 953 raise MP4StreamInfoError(e) 954 955 if version == 0: 956 offset = 8 957 fmt = ">2I" 958 elif version == 1: 959 offset = 16 960 fmt = ">IQ" 961 else: 962 raise MP4StreamInfoError("Unknown mdhd version %d" % version) 963 964 end = offset + struct.calcsize(fmt) 965 unit, length = struct.unpack(fmt, data[offset:end]) 966 try: 967 self.length = float(length) / unit 968 except ZeroDivisionError: 969 self.length = 0 970 971 try: 972 atom = trak[b"mdia", b"minf", b"stbl", b"stsd"] 973 except KeyError: 974 pass 975 else: 976 self._parse_stsd(atom, fileobj) 977 978 def _parse_stsd(self, atom, fileobj): 979 """Sets channels, bits_per_sample, sample_rate and optionally bitrate. 980 981 Can raise MP4StreamInfoError. 982 """ 983 984 assert atom.name == b"stsd" 985 986 ok, data = atom.read(fileobj) 987 if not ok: 988 raise MP4StreamInfoError("Invalid stsd") 989 990 try: 991 version, flags, data = parse_full_atom(data) 992 except ValueError as e: 993 raise MP4StreamInfoError(e) 994 995 if version != 0: 996 raise MP4StreamInfoError("Unsupported stsd version") 997 998 try: 999 num_entries, offset = cdata.uint32_be_from(data, 0) 1000 except cdata.error as e: 1001 raise MP4StreamInfoError(e) 1002 1003 if num_entries == 0: 1004 return 1005 1006 # look at the first entry if there is one 1007 entry_fileobj = cBytesIO(data[offset:]) 1008 try: 1009 entry_atom = Atom(entry_fileobj) 1010 except AtomError as e: 1011 raise MP4StreamInfoError(e) 1012 1013 try: 1014 entry = AudioSampleEntry(entry_atom, entry_fileobj) 1015 except ASEntryError as e: 1016 raise MP4StreamInfoError(e) 1017 else: 1018 self.channels = entry.channels 1019 self.bits_per_sample = entry.sample_size 1020 self.sample_rate = entry.sample_rate 1021 self.bitrate = entry.bitrate 1022 self.codec = entry.codec 1023 self.codec_description = entry.codec_description 1024 1025 def pprint(self): 1026 return "MPEG-4 audio (%s), %.2f seconds, %d bps" % ( 1027 self.codec_description, self.length, self.bitrate) 1028 1029 1030class MP4(FileType): 1031 """MP4(filething) 1032 1033 An MPEG-4 audio file, probably containing AAC. 1034 1035 If more than one track is present in the file, the first is used. 1036 Only audio ('soun') tracks will be read. 1037 1038 Arguments: 1039 filething (filething) 1040 1041 Attributes: 1042 info (`MP4Info`) 1043 tags (`MP4Tags`) 1044 """ 1045 1046 MP4Tags = MP4Tags 1047 1048 _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] 1049 1050 @loadfile() 1051 def load(self, filething): 1052 fileobj = filething.fileobj 1053 1054 try: 1055 atoms = Atoms(fileobj) 1056 except AtomError as err: 1057 reraise(error, err, sys.exc_info()[2]) 1058 1059 self.info = MP4Info() 1060 try: 1061 self.info.load(atoms, fileobj) 1062 except MP4NoTrackError: 1063 pass 1064 except error: 1065 raise 1066 except Exception as err: 1067 reraise(MP4StreamInfoError, err, sys.exc_info()[2]) 1068 1069 if not MP4Tags._can_load(atoms): 1070 self.tags = None 1071 else: 1072 try: 1073 self.tags = self.MP4Tags(atoms, fileobj) 1074 except error: 1075 raise 1076 except Exception as err: 1077 reraise(MP4MetadataError, err, sys.exc_info()[2]) 1078 1079 @property 1080 def _padding(self): 1081 if self.tags is None: 1082 return 0 1083 else: 1084 return self.tags._padding 1085 1086 def save(self, *args, **kwargs): 1087 """save(filething=None, padding=None)""" 1088 1089 super(MP4, self).save(*args, **kwargs) 1090 1091 def add_tags(self): 1092 if self.tags is None: 1093 self.tags = self.MP4Tags() 1094 else: 1095 raise error("an MP4 tag already exists") 1096 1097 @staticmethod 1098 def score(filename, fileobj, header_data): 1099 return (b"ftyp" in header_data) + (b"mp4" in header_data) 1100 1101 1102Open = MP4 1103 1104 1105@convert_error(IOError, error) 1106@loadfile(method=False, writable=True) 1107def delete(filething): 1108 """ delete(filething) 1109 1110 Arguments: 1111 filething (filething) 1112 Raises: 1113 mutagen.MutagenError 1114 1115 Remove tags from a file. 1116 """ 1117 1118 t = MP4(filething) 1119 filething.fileobj.seek(0) 1120 t.delete(filething) 1121