1# -*- coding: utf-8 -*-
2# Copyright (C) 2006  Joe Wreschnig
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8
9"""Read and write MPEG-4 audio files with iTunes metadata.
10
11This module will read MPEG-4 audio information and metadata,
12as found in Apple's MP4 (aka M4A, M4B, M4P) files.
13
14There is no official specification for this format. The source code
15for TagLib, FAAD, and various MPEG specifications at
16
17* http://developer.apple.com/documentation/QuickTime/QTFF/
18* http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt
19* http://standards.iso.org/ittf/PubliclyAvailableStandards/\
20c041828_ISO_IEC_14496-12_2005(E).zip
21* http://wiki.multimedia.cx/index.php?title=Apple_QuickTime
22
23were all consulted.
24"""
25
26import struct
27import sys
28
29from mutagen import FileType, Tags, StreamInfo, PaddingInfo
30from mutagen._constants import GENRES
31from mutagen._util import cdata, insert_bytes, DictProxy, MutagenError, \
32    hashable, enum, get_size, resize_bytes, loadfile, convert_error
33from mutagen._compat import (reraise, PY2, string_types, text_type, chr_,
34                             iteritems, PY3, cBytesIO, izip, xrange)
35from ._atom import Atoms, Atom, AtomError
36from ._util import parse_full_atom
37from ._as_entry import AudioSampleEntry, ASEntryError
38
39
40class error(MutagenError):
41    pass
42
43
44class MP4MetadataError(error):
45    pass
46
47
48class MP4StreamInfoError(error):
49    pass
50
51
52class MP4NoTrackError(MP4StreamInfoError):
53    pass
54
55
56class MP4MetadataValueError(ValueError, MP4MetadataError):
57    pass
58
59
60__all__ = ['MP4', 'Open', 'delete', 'MP4Cover', 'MP4FreeForm', 'AtomDataType']
61
62
63@enum
64class AtomDataType(object):
65    """Enum for ``dataformat`` attribute of MP4FreeForm.
66
67    .. versionadded:: 1.25
68    """
69
70    IMPLICIT = 0
71    """for use with tags for which no type needs to be indicated because
72       only one type is allowed"""
73
74    UTF8 = 1
75    """without any count or null terminator"""
76
77    UTF16 = 2
78    """also known as UTF-16BE"""
79
80    SJIS = 3
81    """deprecated unless it is needed for special Japanese characters"""
82
83    HTML = 6
84    """the HTML file header specifies which HTML version"""
85
86    XML = 7
87    """the XML header must identify the DTD or schemas"""
88
89    UUID = 8
90    """also known as GUID; stored as 16 bytes in binary (valid as an ID)"""
91
92    ISRC = 9
93    """stored as UTF-8 text (valid as an ID)"""
94
95    MI3P = 10
96    """stored as UTF-8 text (valid as an ID)"""
97
98    GIF = 12
99    """(deprecated) a GIF image"""
100
101    JPEG = 13
102    """a JPEG image"""
103
104    PNG = 14
105    """PNG image"""
106
107    URL = 15
108    """absolute, in UTF-8 characters"""
109
110    DURATION = 16
111    """in milliseconds, 32-bit integer"""
112
113    DATETIME = 17
114    """in UTC, counting seconds since midnight, January 1, 1904;
115       32 or 64-bits"""
116
117    GENRES = 18
118    """a list of enumerated values"""
119
120    INTEGER = 21
121    """a signed big-endian integer with length one of { 1,2,3,4,8 } bytes"""
122
123    RIAA_PA = 24
124    """RIAA parental advisory; { -1=no, 1=yes, 0=unspecified },
125       8-bit ingteger"""
126
127    UPC = 25
128    """Universal Product Code, in text UTF-8 format (valid as an ID)"""
129
130    BMP = 27
131    """Windows bitmap image"""
132
133
134@hashable
135class MP4Cover(bytes):
136    """A cover artwork.
137
138    Attributes:
139        imageformat (`AtomDataType`): format of the image
140            (either FORMAT_JPEG or FORMAT_PNG)
141    """
142
143    FORMAT_JPEG = AtomDataType.JPEG
144    FORMAT_PNG = AtomDataType.PNG
145
146    def __new__(cls, data, *args, **kwargs):
147        return bytes.__new__(cls, data)
148
149    def __init__(self, data, imageformat=FORMAT_JPEG):
150        self.imageformat = imageformat
151
152    __hash__ = bytes.__hash__
153
154    def __eq__(self, other):
155        if not isinstance(other, MP4Cover):
156            return bytes(self) == other
157
158        return (bytes(self) == bytes(other) and
159                self.imageformat == other.imageformat)
160
161    def __ne__(self, other):
162        return not self.__eq__(other)
163
164    def __repr__(self):
165        return "%s(%r, %r)" % (
166            type(self).__name__, bytes(self),
167            AtomDataType(self.imageformat))
168
169
170@hashable
171class MP4FreeForm(bytes):
172    """A freeform value.
173
174    Attributes:
175        dataformat (`AtomDataType`): format of the data (see AtomDataType)
176    """
177
178    FORMAT_DATA = AtomDataType.IMPLICIT  # deprecated
179    FORMAT_TEXT = AtomDataType.UTF8  # deprecated
180
181    def __new__(cls, data, *args, **kwargs):
182        return bytes.__new__(cls, data)
183
184    def __init__(self, data, dataformat=AtomDataType.UTF8, version=0):
185        self.dataformat = dataformat
186        self.version = version
187
188    __hash__ = bytes.__hash__
189
190    def __eq__(self, other):
191        if not isinstance(other, MP4FreeForm):
192            return bytes(self) == other
193
194        return (bytes(self) == bytes(other) and
195                self.dataformat == other.dataformat and
196                self.version == other.version)
197
198    def __ne__(self, other):
199        return not self.__eq__(other)
200
201    def __repr__(self):
202        return "%s(%r, %r)" % (
203            type(self).__name__, bytes(self),
204            AtomDataType(self.dataformat))
205
206
207def _name2key(name):
208    if PY2:
209        return name
210    return name.decode("latin-1")
211
212
213def _key2name(key):
214    if PY2:
215        return key
216    return key.encode("latin-1")
217
218
219def _find_padding(atom_path):
220    # Check for padding "free" atom
221    # XXX: we only use them if they are adjacent to ilst, and only one.
222    # and there also is a top level free atom which we could use maybe..?
223
224    meta, ilst = atom_path[-2:]
225    assert meta.name == b"meta" and ilst.name == b"ilst"
226    index = meta.children.index(ilst)
227    try:
228        prev = meta.children[index - 1]
229        if prev.name == b"free":
230            return prev
231    except IndexError:
232        pass
233
234    try:
235        next_ = meta.children[index + 1]
236        if next_.name == b"free":
237            return next_
238    except IndexError:
239        pass
240
241
242def _item_sort_key(key, value):
243    # iTunes always writes the tags in order of "relevance", try
244    # to copy it as closely as possible.
245    order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
246             "\xa9gen", "gnre", "trkn", "disk",
247             "\xa9day", "cpil", "pgap", "pcst", "tmpo",
248             "\xa9too", "----", "covr", "\xa9lyr"]
249    order = dict(izip(order, xrange(len(order))))
250    last = len(order)
251    # If there's no key-based way to distinguish, order by length.
252    # If there's still no way, go by string comparison on the
253    # values, so we at least have something determinstic.
254    return (order.get(key[:4], last), len(repr(value)), repr(value))
255
256
257class MP4Tags(DictProxy, Tags):
258    r"""MP4Tags()
259
260    Dictionary containing Apple iTunes metadata list key/values.
261
262    Keys are four byte identifiers, except for freeform ('----')
263    keys. Values are usually unicode strings, but some atoms have a
264    special structure:
265
266    Text values (multiple values per key are supported):
267
268    * '\\xa9nam' -- track title
269    * '\\xa9alb' -- album
270    * '\\xa9ART' -- artist
271    * 'aART' -- album artist
272    * '\\xa9wrt' -- composer
273    * '\\xa9day' -- year
274    * '\\xa9cmt' -- comment
275    * 'desc' -- description (usually used in podcasts)
276    * 'purd' -- purchase date
277    * '\\xa9grp' -- grouping
278    * '\\xa9gen' -- genre
279    * '\\xa9lyr' -- lyrics
280    * 'purl' -- podcast URL
281    * 'egid' -- podcast episode GUID
282    * 'catg' -- podcast category
283    * 'keyw' -- podcast keywords
284    * '\\xa9too' -- encoded by
285    * 'cprt' -- copyright
286    * 'soal' -- album sort order
287    * 'soaa' -- album artist sort order
288    * 'soar' -- artist sort order
289    * 'sonm' -- title sort order
290    * 'soco' -- composer sort order
291    * 'sosn' -- show sort order
292    * 'tvsh' -- show name
293    * '\\xa9wrk' -- work
294    * '\\xa9mvn' -- movement
295
296    Boolean values:
297
298    * 'cpil' -- part of a compilation
299    * 'pgap' -- part of a gapless album
300    * 'pcst' -- podcast (iTunes reads this only on import)
301
302    Tuples of ints (multiple values per key are supported):
303
304    * 'trkn' -- track number, total tracks
305    * 'disk' -- disc number, total discs
306
307    Integer values:
308
309    * 'tmpo' -- tempo/BPM
310    * '\\xa9mvc' -- Movement Count
311    * '\\xa9mvi' -- Movement Index
312    * 'shwm' -- work/movement
313    * 'stik' -- Media Kind
314    * 'rtng' -- Content Rating
315    * 'tves' -- TV Episode
316    * 'tvsn' -- TV Season
317    * 'plID', 'cnID', 'geID', 'atID', 'sfID', 'cmID', 'akID' -- Various iTunes
318      Internal IDs
319
320    Others:
321
322    * 'covr' -- cover artwork, list of MP4Cover objects (which are
323      tagged strs)
324    * 'gnre' -- ID3v1 genre. Not supported, use '\\xa9gen' instead.
325
326    The freeform '----' frames use a key in the format '----:mean:name'
327    where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
328    identifier for this frame. The value is a str, but is probably
329    text that can be decoded as UTF-8. Multiple values per key are
330    supported.
331
332    MP4 tag data cannot exist outside of the structure of an MP4 file,
333    so this class should not be manually instantiated.
334
335    Unknown non-text tags and tags that failed to parse will be written
336    back as is.
337    """
338
339    def __init__(self, *args, **kwargs):
340        self._failed_atoms = {}
341        super(MP4Tags, self).__init__()
342        if args or kwargs:
343            self.load(*args, **kwargs)
344
345    def load(self, atoms, fileobj):
346        try:
347            path = atoms.path(b"moov", b"udta", b"meta", b"ilst")
348        except KeyError as key:
349            raise MP4MetadataError(key)
350
351        free = _find_padding(path)
352        self._padding = free.datalength if free is not None else 0
353
354        ilst = path[-1]
355        for atom in ilst.children:
356            ok, data = atom.read(fileobj)
357            if not ok:
358                raise MP4MetadataError("Not enough data")
359
360            try:
361                if atom.name in self.__atoms:
362                    info = self.__atoms[atom.name]
363                    info[0](self, atom, data)
364                else:
365                    # unknown atom, try as text
366                    self.__parse_text(atom, data, implicit=False)
367            except MP4MetadataError:
368                # parsing failed, save them so we can write them back
369                key = _name2key(atom.name)
370                self._failed_atoms.setdefault(key, []).append(data)
371
372    def __setitem__(self, key, value):
373        if not isinstance(key, str):
374            raise TypeError("key has to be str")
375        self._render(key, value)
376        super(MP4Tags, self).__setitem__(key, value)
377
378    @classmethod
379    def _can_load(cls, atoms):
380        return b"moov.udta.meta.ilst" in atoms
381
382    def _render(self, key, value):
383        atom_name = _key2name(key)[:4]
384        if atom_name in self.__atoms:
385            render_func = self.__atoms[atom_name][1]
386            render_args = self.__atoms[atom_name][2:]
387        else:
388            render_func = type(self).__render_text
389            render_args = []
390
391        return render_func(self, key, value, *render_args)
392
393    @convert_error(IOError, error)
394    @loadfile(writable=True)
395    def save(self, filething=None, padding=None):
396
397        values = []
398        items = sorted(self.items(), key=lambda kv: _item_sort_key(*kv))
399        for key, value in items:
400            try:
401                values.append(self._render(key, value))
402            except (TypeError, ValueError) as s:
403                reraise(MP4MetadataValueError, s, sys.exc_info()[2])
404
405        for key, failed in iteritems(self._failed_atoms):
406            # don't write atoms back if we have added a new one with
407            # the same name, this excludes freeform which can have
408            # multiple atoms with the same key (most parsers seem to be able
409            # to handle that)
410            if key in self:
411                assert _key2name(key) != b"----"
412                continue
413            for data in failed:
414                values.append(Atom.render(_key2name(key), data))
415
416        data = Atom.render(b"ilst", b"".join(values))
417
418        # Find the old atoms.
419        try:
420            atoms = Atoms(filething.fileobj)
421        except AtomError as err:
422            reraise(error, err, sys.exc_info()[2])
423
424        self.__save(filething.fileobj, atoms, data, padding)
425
426    def __save(self, fileobj, atoms, data, padding):
427        try:
428            path = atoms.path(b"moov", b"udta", b"meta", b"ilst")
429        except KeyError:
430            self.__save_new(fileobj, atoms, data, padding)
431        else:
432            self.__save_existing(fileobj, atoms, path, data, padding)
433
434    def __save_new(self, fileobj, atoms, ilst_data, padding_func):
435        hdlr = Atom.render(b"hdlr", b"\x00" * 8 + b"mdirappl" + b"\x00" * 9)
436        meta_data = b"\x00\x00\x00\x00" + hdlr + ilst_data
437
438        try:
439            path = atoms.path(b"moov", b"udta")
440        except KeyError:
441            path = atoms.path(b"moov")
442
443        offset = path[-1]._dataoffset
444
445        # ignoring some atom overhead... but we don't have padding left anyway
446        # and padding_size is guaranteed to be less than zero
447        content_size = get_size(fileobj) - offset
448        padding_size = -len(meta_data)
449        assert padding_size < 0
450        info = PaddingInfo(padding_size, content_size)
451        new_padding = info._get_padding(padding_func)
452        new_padding = min(0xFFFFFFFF, new_padding)
453
454        free = Atom.render(b"free", b"\x00" * new_padding)
455        meta = Atom.render(b"meta", meta_data + free)
456        if path[-1].name != b"udta":
457            # moov.udta not found -- create one
458            data = Atom.render(b"udta", meta)
459        else:
460            data = meta
461
462        insert_bytes(fileobj, len(data), offset)
463        fileobj.seek(offset)
464        fileobj.write(data)
465        self.__update_parents(fileobj, path, len(data))
466        self.__update_offsets(fileobj, atoms, len(data), offset)
467
468    def __save_existing(self, fileobj, atoms, path, ilst_data, padding_func):
469        # Replace the old ilst atom.
470        ilst = path[-1]
471        offset = ilst.offset
472        length = ilst.length
473
474        # Use adjacent free atom if there is one
475        free = _find_padding(path)
476        if free is not None:
477            offset = min(offset, free.offset)
478            length += free.length
479
480        # Always add a padding atom to make things easier
481        padding_overhead = len(Atom.render(b"free", b""))
482        content_size = get_size(fileobj) - (offset + length)
483        padding_size = length - (len(ilst_data) + padding_overhead)
484        info = PaddingInfo(padding_size, content_size)
485        new_padding = info._get_padding(padding_func)
486        # Limit padding size so we can be sure the free atom overhead is as we
487        # calculated above (see Atom.render)
488        new_padding = min(0xFFFFFFFF, new_padding)
489
490        ilst_data += Atom.render(b"free", b"\x00" * new_padding)
491
492        resize_bytes(fileobj, length, len(ilst_data), offset)
493        delta = len(ilst_data) - length
494
495        fileobj.seek(offset)
496        fileobj.write(ilst_data)
497        self.__update_parents(fileobj, path[:-1], delta)
498        self.__update_offsets(fileobj, atoms, delta, offset)
499
500    def __update_parents(self, fileobj, path, delta):
501        """Update all parent atoms with the new size."""
502
503        if delta == 0:
504            return
505
506        for atom in path:
507            fileobj.seek(atom.offset)
508            size = cdata.uint_be(fileobj.read(4))
509            if size == 1:  # 64bit
510                # skip name (4B) and read size (8B)
511                size = cdata.ulonglong_be(fileobj.read(12)[4:])
512                fileobj.seek(atom.offset + 8)
513                fileobj.write(cdata.to_ulonglong_be(size + delta))
514            else:  # 32bit
515                fileobj.seek(atom.offset)
516                fileobj.write(cdata.to_uint_be(size + delta))
517
518    def __update_offset_table(self, fileobj, fmt, atom, delta, offset):
519        """Update offset table in the specified atom."""
520        if atom.offset > offset:
521            atom.offset += delta
522        fileobj.seek(atom.offset + 12)
523        data = fileobj.read(atom.length - 12)
524        fmt = fmt % cdata.uint_be(data[:4])
525        offsets = struct.unpack(fmt, data[4:])
526        offsets = [o + (0, delta)[offset < o] for o in offsets]
527        fileobj.seek(atom.offset + 16)
528        fileobj.write(struct.pack(fmt, *offsets))
529
530    def __update_tfhd(self, fileobj, atom, delta, offset):
531        if atom.offset > offset:
532            atom.offset += delta
533        fileobj.seek(atom.offset + 9)
534        data = fileobj.read(atom.length - 9)
535        flags = cdata.uint_be(b"\x00" + data[:3])
536        if flags & 1:
537            o = cdata.ulonglong_be(data[7:15])
538            if o > offset:
539                o += delta
540            fileobj.seek(atom.offset + 16)
541            fileobj.write(cdata.to_ulonglong_be(o))
542
543    def __update_offsets(self, fileobj, atoms, delta, offset):
544        """Update offset tables in all 'stco' and 'co64' atoms."""
545        if delta == 0:
546            return
547        moov = atoms[b"moov"]
548        for atom in moov.findall(b'stco', True):
549            self.__update_offset_table(fileobj, ">%dI", atom, delta, offset)
550        for atom in moov.findall(b'co64', True):
551            self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset)
552        try:
553            for atom in atoms[b"moof"].findall(b'tfhd', True):
554                self.__update_tfhd(fileobj, atom, delta, offset)
555        except KeyError:
556            pass
557
558    def __parse_data(self, atom, data):
559        pos = 0
560        while pos < atom.length - 8:
561            head = data[pos:pos + 12]
562            if len(head) != 12:
563                raise MP4MetadataError("truncated atom % r" % atom.name)
564            length, name = struct.unpack(">I4s", head[:8])
565            version = ord(head[8:9])
566            flags = struct.unpack(">I", b"\x00" + head[9:12])[0]
567            if name != b"data":
568                raise MP4MetadataError(
569                    "unexpected atom %r inside %r" % (name, atom.name))
570
571            chunk = data[pos + 16:pos + length]
572            if len(chunk) != length - 16:
573                raise MP4MetadataError("truncated atom % r" % atom.name)
574            yield version, flags, chunk
575            pos += length
576
577    def __add(self, key, value, single=False):
578        assert isinstance(key, str)
579
580        if single:
581            self[key] = value
582        else:
583            self.setdefault(key, []).extend(value)
584
585    def __render_data(self, key, version, flags, value):
586        return Atom.render(_key2name(key), b"".join([
587            Atom.render(
588                b"data", struct.pack(">2I", version << 24 | flags, 0) + data)
589            for data in value]))
590
591    def __parse_freeform(self, atom, data):
592        length = cdata.uint_be(data[:4])
593        mean = data[12:length]
594        pos = length
595        length = cdata.uint_be(data[pos:pos + 4])
596        name = data[pos + 12:pos + length]
597        pos += length
598        value = []
599        while pos < atom.length - 8:
600            length, atom_name = struct.unpack(">I4s", data[pos:pos + 8])
601            if atom_name != b"data":
602                raise MP4MetadataError(
603                    "unexpected atom %r inside %r" % (atom_name, atom.name))
604
605            version = ord(data[pos + 8:pos + 8 + 1])
606            flags = struct.unpack(">I", b"\x00" + data[pos + 9:pos + 12])[0]
607            value.append(MP4FreeForm(data[pos + 16:pos + length],
608                                     dataformat=flags, version=version))
609            pos += length
610
611        key = _name2key(atom.name + b":" + mean + b":" + name)
612        self.__add(key, value)
613
614    def __render_freeform(self, key, value):
615        if isinstance(value, bytes):
616            value = [value]
617
618        dummy, mean, name = _key2name(key).split(b":", 2)
619        mean = struct.pack(">I4sI", len(mean) + 12, b"mean", 0) + mean
620        name = struct.pack(">I4sI", len(name) + 12, b"name", 0) + name
621
622        data = b""
623        for v in value:
624            flags = AtomDataType.UTF8
625            version = 0
626            if isinstance(v, MP4FreeForm):
627                flags = v.dataformat
628                version = v.version
629
630            data += struct.pack(
631                ">I4s2I", len(v) + 16, b"data", version << 24 | flags, 0)
632            data += v
633
634        return Atom.render(b"----", mean + name + data)
635
636    def __parse_pair(self, atom, data):
637        key = _name2key(atom.name)
638        values = [struct.unpack(">2H", d[2:6]) for
639                  version, flags, d in self.__parse_data(atom, data)]
640        self.__add(key, values)
641
642    def __render_pair(self, key, value):
643        data = []
644        for v in value:
645            try:
646                track, total = v
647            except TypeError:
648                raise ValueError
649            if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
650                data.append(struct.pack(">4H", 0, track, total, 0))
651            else:
652                raise MP4MetadataValueError(
653                    "invalid numeric pair %r" % ((track, total),))
654        return self.__render_data(key, 0, AtomDataType.IMPLICIT, data)
655
656    def __render_pair_no_trailing(self, key, value):
657        data = []
658        for (track, total) in value:
659            if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
660                data.append(struct.pack(">3H", 0, track, total))
661            else:
662                raise MP4MetadataValueError(
663                    "invalid numeric pair %r" % ((track, total),))
664        return self.__render_data(key, 0, AtomDataType.IMPLICIT, data)
665
666    def __parse_genre(self, atom, data):
667        values = []
668        for version, flags, data in self.__parse_data(atom, data):
669            # version = 0, flags = 0
670            if len(data) != 2:
671                raise MP4MetadataValueError("invalid genre")
672            genre = cdata.short_be(data)
673            # Translate to a freeform genre.
674            try:
675                genre = GENRES[genre - 1]
676            except IndexError:
677                # this will make us write it back at least
678                raise MP4MetadataValueError("unknown genre")
679            values.append(genre)
680        key = _name2key(b"\xa9gen")
681        self.__add(key, values)
682
683    def __parse_integer(self, atom, data):
684        values = []
685        for version, flags, data in self.__parse_data(atom, data):
686            if version != 0:
687                raise MP4MetadataValueError("unsupported version")
688            if flags not in (AtomDataType.IMPLICIT, AtomDataType.INTEGER):
689                raise MP4MetadataValueError("unsupported type")
690
691            if len(data) == 1:
692                value = cdata.int8(data)
693            elif len(data) == 2:
694                value = cdata.int16_be(data)
695            elif len(data) == 3:
696                value = cdata.int32_be(data + b"\x00") >> 8
697            elif len(data) == 4:
698                value = cdata.int32_be(data)
699            elif len(data) == 8:
700                value = cdata.int64_be(data)
701            else:
702                raise MP4MetadataValueError(
703                    "invalid value size %d" % len(data))
704            values.append(value)
705
706        key = _name2key(atom.name)
707        self.__add(key, values)
708
709    def __render_integer(self, key, value, min_bytes):
710        assert min_bytes in (1, 2, 4, 8)
711
712        data_list = []
713        try:
714            for v in value:
715                # We default to the int size of the usual values written
716                # by itunes for compatibility.
717                if cdata.int8_min <= v <= cdata.int8_max and min_bytes <= 1:
718                    data = cdata.to_int8(v)
719                elif cdata.int16_min <= v <= cdata.int16_max and \
720                        min_bytes <= 2:
721                    data = cdata.to_int16_be(v)
722                elif cdata.int32_min <= v <= cdata.int32_max and \
723                        min_bytes <= 4:
724                    data = cdata.to_int32_be(v)
725                elif cdata.int64_min <= v <= cdata.int64_max and \
726                        min_bytes <= 8:
727                    data = cdata.to_int64_be(v)
728                else:
729                    raise MP4MetadataValueError(
730                        "value out of range: %r" % value)
731                data_list.append(data)
732
733        except (TypeError, ValueError, cdata.error) as e:
734            raise MP4MetadataValueError(e)
735
736        return self.__render_data(key, 0, AtomDataType.INTEGER, data_list)
737
738    def __parse_bool(self, atom, data):
739        for version, flags, data in self.__parse_data(atom, data):
740            if len(data) != 1:
741                raise MP4MetadataValueError("invalid bool")
742
743            value = bool(ord(data))
744            key = _name2key(atom.name)
745            self.__add(key, value, single=True)
746
747    def __render_bool(self, key, value):
748        return self.__render_data(
749            key, 0, AtomDataType.INTEGER, [chr_(bool(value))])
750
751    def __parse_cover(self, atom, data):
752        values = []
753        pos = 0
754        while pos < atom.length - 8:
755            length, name, imageformat = struct.unpack(">I4sI",
756                                                      data[pos:pos + 12])
757            if name != b"data":
758                if name == b"name":
759                    pos += length
760                    continue
761                raise MP4MetadataError(
762                    "unexpected atom %r inside 'covr'" % name)
763            if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG):
764                # Sometimes AtomDataType.IMPLICIT or simply wrong.
765                # In all cases it was jpeg, so default to it
766                imageformat = MP4Cover.FORMAT_JPEG
767            cover = MP4Cover(data[pos + 16:pos + length], imageformat)
768            values.append(cover)
769            pos += length
770
771        key = _name2key(atom.name)
772        self.__add(key, values)
773
774    def __render_cover(self, key, value):
775        atom_data = []
776        for cover in value:
777            try:
778                imageformat = cover.imageformat
779            except AttributeError:
780                imageformat = MP4Cover.FORMAT_JPEG
781            atom_data.append(Atom.render(
782                b"data", struct.pack(">2I", imageformat, 0) + cover))
783        return Atom.render(_key2name(key), b"".join(atom_data))
784
785    def __parse_text(self, atom, data, implicit=True):
786        # implicit = False, for parsing unknown atoms only take utf8 ones.
787        # For known ones we can assume the implicit are utf8 too.
788        values = []
789        for version, flags, atom_data in self.__parse_data(atom, data):
790            if implicit:
791                if flags not in (AtomDataType.IMPLICIT, AtomDataType.UTF8):
792                    raise MP4MetadataError(
793                        "Unknown atom type %r for %r" % (flags, atom.name))
794            else:
795                if flags != AtomDataType.UTF8:
796                    raise MP4MetadataError(
797                        "%r is not text, ignore" % atom.name)
798
799            try:
800                text = atom_data.decode("utf-8")
801            except UnicodeDecodeError as e:
802                raise MP4MetadataError("%s: %s" % (_name2key(atom.name), e))
803
804            values.append(text)
805
806        key = _name2key(atom.name)
807        self.__add(key, values)
808
809    def __render_text(self, key, value, flags=AtomDataType.UTF8):
810        if isinstance(value, string_types):
811            value = [value]
812
813        encoded = []
814        for v in value:
815            if not isinstance(v, text_type):
816                if PY3:
817                    raise TypeError("%r not str" % v)
818                try:
819                    v = v.decode("utf-8")
820                except (AttributeError, UnicodeDecodeError) as e:
821                    raise TypeError(e)
822            encoded.append(v.encode("utf-8"))
823
824        return self.__render_data(key, 0, flags, encoded)
825
826    def delete(self, filename):
827        """Remove the metadata from the given filename."""
828
829        self._failed_atoms.clear()
830        self.clear()
831        self.save(filename, padding=lambda x: 0)
832
833    __atoms = {
834        b"----": (__parse_freeform, __render_freeform),
835        b"trkn": (__parse_pair, __render_pair),
836        b"disk": (__parse_pair, __render_pair_no_trailing),
837        b"gnre": (__parse_genre, None),
838        b"plID": (__parse_integer, __render_integer, 8),
839        b"cnID": (__parse_integer, __render_integer, 4),
840        b"geID": (__parse_integer, __render_integer, 4),
841        b"atID": (__parse_integer, __render_integer, 4),
842        b"sfID": (__parse_integer, __render_integer, 4),
843        b"cmID": (__parse_integer, __render_integer, 4),
844        b"akID": (__parse_integer, __render_integer, 1),
845        b"tvsn": (__parse_integer, __render_integer, 4),
846        b"tves": (__parse_integer, __render_integer, 4),
847        b"tmpo": (__parse_integer, __render_integer, 2),
848        b"\xa9mvi": (__parse_integer, __render_integer, 2),
849        b"\xa9mvc": (__parse_integer, __render_integer, 2),
850        b"cpil": (__parse_bool, __render_bool),
851        b"pgap": (__parse_bool, __render_bool),
852        b"pcst": (__parse_bool, __render_bool),
853        b"shwm": (__parse_integer, __render_integer, 1),
854        b"stik": (__parse_integer, __render_integer, 1),
855        b"rtng": (__parse_integer, __render_integer, 1),
856        b"covr": (__parse_cover, __render_cover),
857        b"purl": (__parse_text, __render_text),
858        b"egid": (__parse_text, __render_text),
859    }
860
861    # these allow implicit flags and parse as text
862    for name in [b"\xa9nam", b"\xa9alb", b"\xa9ART", b"aART", b"\xa9wrt",
863                 b"\xa9day", b"\xa9cmt", b"desc", b"purd", b"\xa9grp",
864                 b"\xa9gen", b"\xa9lyr", b"catg", b"keyw", b"\xa9too",
865                 b"cprt", b"soal", b"soaa", b"soar", b"sonm", b"soco",
866                 b"sosn", b"tvsh"]:
867        __atoms[name] = (__parse_text, __render_text)
868
869    def pprint(self):
870
871        def to_line(key, value):
872            assert isinstance(key, text_type)
873            if isinstance(value, text_type):
874                return u"%s=%s" % (key, value)
875            return u"%s=%r" % (key, value)
876
877        values = []
878        for key, value in sorted(iteritems(self)):
879            if not isinstance(key, text_type):
880                key = key.decode("latin-1")
881            if key == "covr":
882                values.append(u"%s=%s" % (key, u", ".join(
883                    [u"[%d bytes of data]" % len(data) for data in value])))
884            elif isinstance(value, list):
885                for v in value:
886                    values.append(to_line(key, v))
887            else:
888                values.append(to_line(key, value))
889        return u"\n".join(values)
890
891
892class MP4Info(StreamInfo):
893    """MP4Info()
894
895    MPEG-4 stream information.
896
897    Attributes:
898        bitrate (`int`): bitrate in bits per second, as an int
899        length (`float`): file length in seconds, as a float
900        channels (`int`): number of audio channels
901        sample_rate (`int`): audio sampling rate in Hz
902        bits_per_sample (`int`): bits per sample
903        codec (`mutagen.text`):
904            * if starting with ``"mp4a"`` uses an mp4a audio codec
905              (see the codec parameter in rfc6381 for details e.g.
906              ``"mp4a.40.2"``)
907            * for everything else see a list of possible values at
908              http://www.mp4ra.org/codecs.html
909
910            e.g. ``"mp4a"``, ``"alac"``, ``"mp4a.40.2"``, ``"ac-3"`` etc.
911        codec_description (`mutagen.text`):
912            Name of the codec used (ALAC, AAC LC, AC-3...). Values might
913            change in the future, use for display purposes only.
914    """
915
916    bitrate = 0
917    length = 0.0
918    channels = 0
919    sample_rate = 0
920    bits_per_sample = 0
921    codec = u""
922    codec_description = u""
923
924    def __init__(self, *args, **kwargs):
925        if args or kwargs:
926            self.load(*args, **kwargs)
927
928    @convert_error(IOError, MP4StreamInfoError)
929    def load(self, atoms, fileobj):
930        try:
931            moov = atoms[b"moov"]
932        except KeyError:
933            raise MP4StreamInfoError("not a MP4 file")
934
935        for trak in moov.findall(b"trak"):
936            hdlr = trak[b"mdia", b"hdlr"]
937            ok, data = hdlr.read(fileobj)
938            if not ok:
939                raise MP4StreamInfoError("Not enough data")
940            if data[8:12] == b"soun":
941                break
942        else:
943            raise MP4NoTrackError("track has no audio data")
944
945        mdhd = trak[b"mdia", b"mdhd"]
946        ok, data = mdhd.read(fileobj)
947        if not ok:
948            raise MP4StreamInfoError("Not enough data")
949
950        try:
951            version, flags, data = parse_full_atom(data)
952        except ValueError as e:
953            raise MP4StreamInfoError(e)
954
955        if version == 0:
956            offset = 8
957            fmt = ">2I"
958        elif version == 1:
959            offset = 16
960            fmt = ">IQ"
961        else:
962            raise MP4StreamInfoError("Unknown mdhd version %d" % version)
963
964        end = offset + struct.calcsize(fmt)
965        unit, length = struct.unpack(fmt, data[offset:end])
966        try:
967            self.length = float(length) / unit
968        except ZeroDivisionError:
969            self.length = 0
970
971        try:
972            atom = trak[b"mdia", b"minf", b"stbl", b"stsd"]
973        except KeyError:
974            pass
975        else:
976            self._parse_stsd(atom, fileobj)
977
978    def _parse_stsd(self, atom, fileobj):
979        """Sets channels, bits_per_sample, sample_rate and optionally bitrate.
980
981        Can raise MP4StreamInfoError.
982        """
983
984        assert atom.name == b"stsd"
985
986        ok, data = atom.read(fileobj)
987        if not ok:
988            raise MP4StreamInfoError("Invalid stsd")
989
990        try:
991            version, flags, data = parse_full_atom(data)
992        except ValueError as e:
993            raise MP4StreamInfoError(e)
994
995        if version != 0:
996            raise MP4StreamInfoError("Unsupported stsd version")
997
998        try:
999            num_entries, offset = cdata.uint32_be_from(data, 0)
1000        except cdata.error as e:
1001            raise MP4StreamInfoError(e)
1002
1003        if num_entries == 0:
1004            return
1005
1006        # look at the first entry if there is one
1007        entry_fileobj = cBytesIO(data[offset:])
1008        try:
1009            entry_atom = Atom(entry_fileobj)
1010        except AtomError as e:
1011            raise MP4StreamInfoError(e)
1012
1013        try:
1014            entry = AudioSampleEntry(entry_atom, entry_fileobj)
1015        except ASEntryError as e:
1016            raise MP4StreamInfoError(e)
1017        else:
1018            self.channels = entry.channels
1019            self.bits_per_sample = entry.sample_size
1020            self.sample_rate = entry.sample_rate
1021            self.bitrate = entry.bitrate
1022            self.codec = entry.codec
1023            self.codec_description = entry.codec_description
1024
1025    def pprint(self):
1026        return "MPEG-4 audio (%s), %.2f seconds, %d bps" % (
1027            self.codec_description, self.length, self.bitrate)
1028
1029
1030class MP4(FileType):
1031    """MP4(filething)
1032
1033    An MPEG-4 audio file, probably containing AAC.
1034
1035    If more than one track is present in the file, the first is used.
1036    Only audio ('soun') tracks will be read.
1037
1038    Arguments:
1039        filething (filething)
1040
1041    Attributes:
1042        info (`MP4Info`)
1043        tags (`MP4Tags`)
1044    """
1045
1046    MP4Tags = MP4Tags
1047
1048    _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
1049
1050    @loadfile()
1051    def load(self, filething):
1052        fileobj = filething.fileobj
1053
1054        try:
1055            atoms = Atoms(fileobj)
1056        except AtomError as err:
1057            reraise(error, err, sys.exc_info()[2])
1058
1059        self.info = MP4Info()
1060        try:
1061            self.info.load(atoms, fileobj)
1062        except MP4NoTrackError:
1063            pass
1064        except error:
1065            raise
1066        except Exception as err:
1067            reraise(MP4StreamInfoError, err, sys.exc_info()[2])
1068
1069        if not MP4Tags._can_load(atoms):
1070            self.tags = None
1071        else:
1072            try:
1073                self.tags = self.MP4Tags(atoms, fileobj)
1074            except error:
1075                raise
1076            except Exception as err:
1077                reraise(MP4MetadataError, err, sys.exc_info()[2])
1078
1079    @property
1080    def _padding(self):
1081        if self.tags is None:
1082            return 0
1083        else:
1084            return self.tags._padding
1085
1086    def save(self, *args, **kwargs):
1087        """save(filething=None, padding=None)"""
1088
1089        super(MP4, self).save(*args, **kwargs)
1090
1091    def add_tags(self):
1092        if self.tags is None:
1093            self.tags = self.MP4Tags()
1094        else:
1095            raise error("an MP4 tag already exists")
1096
1097    @staticmethod
1098    def score(filename, fileobj, header_data):
1099        return (b"ftyp" in header_data) + (b"mp4" in header_data)
1100
1101
1102Open = MP4
1103
1104
1105@convert_error(IOError, error)
1106@loadfile(method=False, writable=True)
1107def delete(filething):
1108    """ delete(filething)
1109
1110    Arguments:
1111        filething (filething)
1112    Raises:
1113        mutagen.MutagenError
1114
1115    Remove tags from a file.
1116    """
1117
1118    t = MP4(filething)
1119    filething.fileobj.seek(0)
1120    t.delete(filething)
1121