1# -*- coding: utf-8 -*-
2# Copyright (C) 2005  Joe Wreschnig
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8
9"""APEv2 reading and writing.
10
11The APEv2 format is most commonly used with Musepack files, but is
12also the format of choice for WavPack and other formats. Some MP3s
13also have APEv2 tags, but this can cause problems with many MP3
14decoders and taggers.
15
16APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2
17keys can be any ASCII string with characters from 0x20 to 0x7E,
18between 2 and 255 characters long.  Keys are case-sensitive, but
19readers are recommended to be case insensitive, and it is forbidden to
20multiple keys which differ only in case.  Keys are usually stored
21title-cased (e.g. 'Artist' rather than 'artist').
22
23APEv2 values are slightly more structured than Vorbis comments; values
24are flagged as one of text, binary, or an external reference (usually
25a URI).
26
27Based off the format specification found at
28http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification.
29"""
30
31__all__ = ["APEv2", "APEv2File", "Open", "delete"]
32
33import sys
34import struct
35from collections import MutableSequence
36
37from ._compat import (cBytesIO, PY3, text_type, PY2, reraise, swap_to_string,
38                      xrange)
39from mutagen import Metadata, FileType, StreamInfo
40from mutagen._util import DictMixin, cdata, delete_bytes, total_ordering, \
41    MutagenError, loadfile, convert_error, seek_end, get_size
42
43
44def is_valid_apev2_key(key):
45    if not isinstance(key, text_type):
46        if PY3:
47            raise TypeError("APEv2 key must be str")
48
49        try:
50            key = key.decode('ascii')
51        except UnicodeDecodeError:
52            return False
53
54    # PY26 - Change to set literal syntax (since set is faster than list here)
55    return ((2 <= len(key) <= 255) and (min(key) >= u' ') and
56            (max(key) <= u'~') and
57            (key not in [u"OggS", u"TAG", u"ID3", u"MP+"]))
58
59# There are three different kinds of APE tag values.
60# "0: Item contains text information coded in UTF-8
61#  1: Item contains binary information
62#  2: Item is a locator of external stored information [e.g. URL]
63#  3: reserved"
64TEXT, BINARY, EXTERNAL = xrange(3)
65
66HAS_HEADER = 1 << 31
67HAS_NO_FOOTER = 1 << 30
68IS_HEADER = 1 << 29
69
70
71class error(MutagenError):
72    pass
73
74
75class APENoHeaderError(error):
76    pass
77
78
79class APEUnsupportedVersionError(error):
80    pass
81
82
83class APEBadItemError(error):
84    pass
85
86
87class _APEv2Data(object):
88    # Store offsets of the important parts of the file.
89    start = header = data = footer = end = None
90    # Footer or header; seek here and read 32 to get version/size/items/flags
91    metadata = None
92    # Actual tag data
93    tag = None
94
95    version = None
96    size = None
97    items = None
98    flags = 0
99
100    # The tag is at the start rather than the end. A tag at both
101    # the start and end of the file (i.e. the tag is the whole file)
102    # is not considered to be at the start.
103    is_at_start = False
104
105    def __init__(self, fileobj):
106        """Raises IOError and apev2.error"""
107
108        self.__find_metadata(fileobj)
109
110        if self.header is None:
111            self.metadata = self.footer
112        elif self.footer is None:
113            self.metadata = self.header
114        else:
115            self.metadata = max(self.header, self.footer)
116
117        if self.metadata is None:
118            return
119
120        self.__fill_missing(fileobj)
121        self.__fix_brokenness(fileobj)
122        if self.data is not None:
123            fileobj.seek(self.data)
124            self.tag = fileobj.read(self.size)
125
126    def __find_metadata(self, fileobj):
127        # Try to find a header or footer.
128
129        # Check for a simple footer.
130        try:
131            fileobj.seek(-32, 2)
132        except IOError:
133            fileobj.seek(0, 2)
134            return
135        if fileobj.read(8) == b"APETAGEX":
136            fileobj.seek(-8, 1)
137            self.footer = self.metadata = fileobj.tell()
138            return
139
140        # Check for an APEv2 tag followed by an ID3v1 tag at the end.
141        try:
142            if get_size(fileobj) < 128:
143                raise IOError
144            fileobj.seek(-128, 2)
145            if fileobj.read(3) == b"TAG":
146
147                fileobj.seek(-35, 1)  # "TAG" + header length
148                if fileobj.read(8) == b"APETAGEX":
149                    fileobj.seek(-8, 1)
150                    self.footer = fileobj.tell()
151                    return
152
153                # ID3v1 tag at the end, maybe preceded by Lyrics3v2.
154                # (http://www.id3.org/lyrics3200.html)
155                # (header length - "APETAGEX") - "LYRICS200"
156                fileobj.seek(15, 1)
157                if fileobj.read(9) == b'LYRICS200':
158                    fileobj.seek(-15, 1)  # "LYRICS200" + size tag
159                    try:
160                        offset = int(fileobj.read(6))
161                    except ValueError:
162                        raise IOError
163
164                    fileobj.seek(-32 - offset - 6, 1)
165                    if fileobj.read(8) == b"APETAGEX":
166                        fileobj.seek(-8, 1)
167                        self.footer = fileobj.tell()
168                        return
169
170        except IOError:
171            pass
172
173        # Check for a tag at the start.
174        fileobj.seek(0, 0)
175        if fileobj.read(8) == b"APETAGEX":
176            self.is_at_start = True
177            self.header = 0
178
179    def __fill_missing(self, fileobj):
180        """Raises IOError and apev2.error"""
181
182        fileobj.seek(self.metadata + 8)
183
184        data = fileobj.read(16)
185        if len(data) != 16:
186            raise error
187
188        self.version = data[:4]
189        self.size = cdata.uint32_le(data[4:8])
190        self.items = cdata.uint32_le(data[8:12])
191        self.flags = cdata.uint32_le(data[12:])
192
193        if self.header is not None:
194            self.data = self.header + 32
195            # If we're reading the header, the size is the header
196            # offset + the size, which includes the footer.
197            self.end = self.data + self.size
198            fileobj.seek(self.end - 32, 0)
199            if fileobj.read(8) == b"APETAGEX":
200                self.footer = self.end - 32
201        elif self.footer is not None:
202            self.end = self.footer + 32
203            self.data = self.end - self.size
204            if self.flags & HAS_HEADER:
205                self.header = self.data - 32
206            else:
207                self.header = self.data
208        else:
209            raise APENoHeaderError("No APE tag found")
210
211        # exclude the footer from size
212        if self.footer is not None:
213            self.size -= 32
214
215    def __fix_brokenness(self, fileobj):
216        # Fix broken tags written with PyMusepack.
217        if self.header is not None:
218            start = self.header
219        else:
220            start = self.data
221        fileobj.seek(start)
222
223        while start > 0:
224            # Clean up broken writing from pre-Mutagen PyMusepack.
225            # It didn't remove the first 24 bytes of header.
226            try:
227                fileobj.seek(-24, 1)
228            except IOError:
229                break
230            else:
231                if fileobj.read(8) == b"APETAGEX":
232                    fileobj.seek(-8, 1)
233                    start = fileobj.tell()
234                else:
235                    break
236        self.start = start
237
238
239class _CIDictProxy(DictMixin):
240
241    def __init__(self, *args, **kwargs):
242        self.__casemap = {}
243        self.__dict = {}
244        super(_CIDictProxy, self).__init__(*args, **kwargs)
245        # Internally all names are stored as lowercase, but the case
246        # they were set with is remembered and used when saving.  This
247        # is roughly in line with the standard, which says that keys
248        # are case-sensitive but two keys differing only in case are
249        # not allowed, and recommends case-insensitive
250        # implementations.
251
252    def __getitem__(self, key):
253        return self.__dict[key.lower()]
254
255    def __setitem__(self, key, value):
256        lower = key.lower()
257        self.__casemap[lower] = key
258        self.__dict[lower] = value
259
260    def __delitem__(self, key):
261        lower = key.lower()
262        del(self.__casemap[lower])
263        del(self.__dict[lower])
264
265    def keys(self):
266        return [self.__casemap.get(key, key) for key in self.__dict.keys()]
267
268
269class APEv2(_CIDictProxy, Metadata):
270    """APEv2(filething=None)
271
272    A file with an APEv2 tag.
273
274    ID3v1 tags are silently ignored and overwritten.
275    """
276
277    filename = None
278
279    def pprint(self):
280        """Return tag key=value pairs in a human-readable format."""
281
282        items = sorted(self.items())
283        return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items)
284
285    @convert_error(IOError, error)
286    @loadfile()
287    def load(self, filething):
288        """Load tags from a filename.
289
290        Raises apev2.error
291        """
292
293        data = _APEv2Data(filething.fileobj)
294
295        if data.tag:
296            self.clear()
297            self.__parse_tag(data.tag, data.items)
298        else:
299            raise APENoHeaderError("No APE tag found")
300
301    def __parse_tag(self, tag, count):
302        """Raises IOError and APEBadItemError"""
303
304        fileobj = cBytesIO(tag)
305
306        for i in xrange(count):
307            tag_data = fileobj.read(8)
308            # someone writes wrong item counts
309            if not tag_data:
310                break
311            if len(tag_data) != 8:
312                raise error
313            size = cdata.uint32_le(tag_data[:4])
314            flags = cdata.uint32_le(tag_data[4:8])
315
316            # Bits 1 and 2 bits are flags, 0-3
317            # Bit 0 is read/write flag, ignored
318            kind = (flags & 6) >> 1
319            if kind == 3:
320                raise APEBadItemError("value type must be 0, 1, or 2")
321
322            key = value = fileobj.read(1)
323            if not key:
324                raise APEBadItemError
325            while key[-1:] != b'\x00' and value:
326                value = fileobj.read(1)
327                if not value:
328                    raise APEBadItemError
329                key += value
330            if key[-1:] == b"\x00":
331                key = key[:-1]
332
333            if PY3:
334                try:
335                    key = key.decode("ascii")
336                except UnicodeError as err:
337                    reraise(APEBadItemError, err, sys.exc_info()[2])
338            value = fileobj.read(size)
339            if len(value) != size:
340                raise APEBadItemError
341
342            value = _get_value_type(kind)._new(value)
343
344            self[key] = value
345
346    def __getitem__(self, key):
347        if not is_valid_apev2_key(key):
348            raise KeyError("%r is not a valid APEv2 key" % key)
349        if PY2:
350            key = key.encode('ascii')
351
352        return super(APEv2, self).__getitem__(key)
353
354    def __delitem__(self, key):
355        if not is_valid_apev2_key(key):
356            raise KeyError("%r is not a valid APEv2 key" % key)
357        if PY2:
358            key = key.encode('ascii')
359
360        super(APEv2, self).__delitem__(key)
361
362    def __setitem__(self, key, value):
363        """'Magic' value setter.
364
365        This function tries to guess at what kind of value you want to
366        store. If you pass in a valid UTF-8 or Unicode string, it
367        treats it as a text value. If you pass in a list, it treats it
368        as a list of string/Unicode values.  If you pass in a string
369        that is not valid UTF-8, it assumes it is a binary value.
370
371        Python 3: all bytes will be assumed to be a byte value, even
372        if they are valid utf-8.
373
374        If you need to force a specific type of value (e.g. binary
375        data that also happens to be valid UTF-8, or an external
376        reference), use the APEValue factory and set the value to the
377        result of that::
378
379            from mutagen.apev2 import APEValue, EXTERNAL
380            tag['Website'] = APEValue('http://example.org', EXTERNAL)
381        """
382
383        if not is_valid_apev2_key(key):
384            raise KeyError("%r is not a valid APEv2 key" % key)
385
386        if PY2:
387            key = key.encode('ascii')
388
389        if not isinstance(value, _APEValue):
390            # let's guess at the content if we're not already a value...
391            if isinstance(value, text_type):
392                # unicode? we've got to be text.
393                value = APEValue(value, TEXT)
394            elif isinstance(value, list):
395                items = []
396                for v in value:
397                    if not isinstance(v, text_type):
398                        if PY3:
399                            raise TypeError("item in list not str")
400                        v = v.decode("utf-8")
401                    items.append(v)
402
403                # list? text.
404                value = APEValue(u"\0".join(items), TEXT)
405            else:
406                if PY3:
407                    value = APEValue(value, BINARY)
408                else:
409                    try:
410                        value.decode("utf-8")
411                    except UnicodeError:
412                        # invalid UTF8 text, probably binary
413                        value = APEValue(value, BINARY)
414                    else:
415                        # valid UTF8, probably text
416                        value = APEValue(value, TEXT)
417
418        super(APEv2, self).__setitem__(key, value)
419
420    @convert_error(IOError, error)
421    @loadfile(writable=True, create=True)
422    def save(self, filething=None):
423        """Save changes to a file.
424
425        If no filename is given, the one most recently loaded is used.
426
427        Tags are always written at the end of the file, and include
428        a header and a footer.
429        """
430
431        fileobj = filething.fileobj
432
433        data = _APEv2Data(fileobj)
434
435        if data.is_at_start:
436            delete_bytes(fileobj, data.end - data.start, data.start)
437        elif data.start is not None:
438            fileobj.seek(data.start)
439            # Delete an ID3v1 tag if present, too.
440            fileobj.truncate()
441        fileobj.seek(0, 2)
442
443        tags = []
444        for key, value in self.items():
445            # Packed format for an item:
446            # 4B: Value length
447            # 4B: Value type
448            # Key name
449            # 1B: Null
450            # Key value
451            value_data = value._write()
452            if not isinstance(key, bytes):
453                key = key.encode("utf-8")
454            tag_data = bytearray()
455            tag_data += struct.pack("<2I", len(value_data), value.kind << 1)
456            tag_data += key + b"\0" + value_data
457            tags.append(bytes(tag_data))
458
459        # "APE tags items should be sorted ascending by size... This is
460        # not a MUST, but STRONGLY recommended. Actually the items should
461        # be sorted by importance/byte, but this is not feasible."
462        tags.sort(key=lambda tag: (len(tag), tag))
463        num_tags = len(tags)
464        tags = b"".join(tags)
465
466        header = bytearray(b"APETAGEX")
467        # version, tag size, item count, flags
468        header += struct.pack("<4I", 2000, len(tags) + 32, num_tags,
469                              HAS_HEADER | IS_HEADER)
470        header += b"\0" * 8
471        fileobj.write(header)
472
473        fileobj.write(tags)
474
475        footer = bytearray(b"APETAGEX")
476        footer += struct.pack("<4I", 2000, len(tags) + 32, num_tags,
477                              HAS_HEADER)
478        footer += b"\0" * 8
479
480        fileobj.write(footer)
481
482    @convert_error(IOError, error)
483    @loadfile(writable=True)
484    def delete(self, filething=None):
485        """Remove tags from a file."""
486
487        fileobj = filething.fileobj
488        data = _APEv2Data(fileobj)
489        if data.start is not None and data.size is not None:
490            delete_bytes(fileobj, data.end - data.start, data.start)
491        self.clear()
492
493
494Open = APEv2
495
496
497@convert_error(IOError, error)
498@loadfile(method=False, writable=True)
499def delete(filething):
500    """delete(filething)
501
502    Arguments:
503        filething (filething)
504    Raises:
505        mutagen.MutagenError
506
507    Remove tags from a file.
508    """
509
510    try:
511        t = APEv2(filething)
512    except APENoHeaderError:
513        return
514    filething.fileobj.seek(0)
515    t.delete(filething)
516
517
518def _get_value_type(kind):
519    """Returns a _APEValue subclass or raises ValueError"""
520
521    if kind == TEXT:
522        return APETextValue
523    elif kind == BINARY:
524        return APEBinaryValue
525    elif kind == EXTERNAL:
526        return APEExtValue
527    raise ValueError("unknown kind %r" % kind)
528
529
530def APEValue(value, kind):
531    """APEv2 tag value factory.
532
533    Use this if you need to specify the value's type manually.  Binary
534    and text data are automatically detected by APEv2.__setitem__.
535    """
536
537    try:
538        type_ = _get_value_type(kind)
539    except ValueError:
540        raise ValueError("kind must be TEXT, BINARY, or EXTERNAL")
541    else:
542        return type_(value)
543
544
545class _APEValue(object):
546
547    kind = None
548    value = None
549
550    def __init__(self, value, kind=None):
551        # kind kwarg is for backwards compat
552        if kind is not None and kind != self.kind:
553            raise ValueError
554        self.value = self._validate(value)
555
556    @classmethod
557    def _new(cls, data):
558        instance = cls.__new__(cls)
559        instance._parse(data)
560        return instance
561
562    def _parse(self, data):
563        """Sets value or raises APEBadItemError"""
564
565        raise NotImplementedError
566
567    def _write(self):
568        """Returns bytes"""
569
570        raise NotImplementedError
571
572    def _validate(self, value):
573        """Returns validated value or raises TypeError/ValueErrr"""
574
575        raise NotImplementedError
576
577    def __repr__(self):
578        return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind)
579
580
581@swap_to_string
582@total_ordering
583class _APEUtf8Value(_APEValue):
584
585    def _parse(self, data):
586        try:
587            self.value = data.decode("utf-8")
588        except UnicodeDecodeError as e:
589            reraise(APEBadItemError, e, sys.exc_info()[2])
590
591    def _validate(self, value):
592        if not isinstance(value, text_type):
593            if PY3:
594                raise TypeError("value not str")
595            else:
596                value = value.decode("utf-8")
597        return value
598
599    def _write(self):
600        return self.value.encode("utf-8")
601
602    def __len__(self):
603        return len(self.value)
604
605    def __bytes__(self):
606        return self._write()
607
608    def __eq__(self, other):
609        return self.value == other
610
611    def __lt__(self, other):
612        return self.value < other
613
614    def __str__(self):
615        return self.value
616
617
618class APETextValue(_APEUtf8Value, MutableSequence):
619    """An APEv2 text value.
620
621    Text values are Unicode/UTF-8 strings. They can be accessed like
622    strings (with a null separating the values), or arrays of strings.
623    """
624
625    kind = TEXT
626
627    def __iter__(self):
628        """Iterate over the strings of the value (not the characters)"""
629
630        return iter(self.value.split(u"\0"))
631
632    def __getitem__(self, index):
633        return self.value.split(u"\0")[index]
634
635    def __len__(self):
636        return self.value.count(u"\0") + 1
637
638    def __setitem__(self, index, value):
639        if not isinstance(value, text_type):
640            if PY3:
641                raise TypeError("value not str")
642            else:
643                value = value.decode("utf-8")
644
645        values = list(self)
646        values[index] = value
647        self.value = u"\0".join(values)
648
649    def insert(self, index, value):
650        if not isinstance(value, text_type):
651            if PY3:
652                raise TypeError("value not str")
653            else:
654                value = value.decode("utf-8")
655
656        values = list(self)
657        values.insert(index, value)
658        self.value = u"\0".join(values)
659
660    def __delitem__(self, index):
661        values = list(self)
662        del values[index]
663        self.value = u"\0".join(values)
664
665    def pprint(self):
666        return u" / ".join(self)
667
668
669@swap_to_string
670@total_ordering
671class APEBinaryValue(_APEValue):
672    """An APEv2 binary value."""
673
674    kind = BINARY
675
676    def _parse(self, data):
677        self.value = data
678
679    def _write(self):
680        return self.value
681
682    def _validate(self, value):
683        if not isinstance(value, bytes):
684            raise TypeError("value not bytes")
685        return bytes(value)
686
687    def __len__(self):
688        return len(self.value)
689
690    def __bytes__(self):
691        return self._write()
692
693    def __eq__(self, other):
694        return self.value == other
695
696    def __lt__(self, other):
697        return self.value < other
698
699    def pprint(self):
700        return u"[%d bytes]" % len(self)
701
702
703class APEExtValue(_APEUtf8Value):
704    """An APEv2 external value.
705
706    External values are usually URI or IRI strings.
707    """
708
709    kind = EXTERNAL
710
711    def pprint(self):
712        return u"[External] %s" % self.value
713
714
715class APEv2File(FileType):
716    """APEv2File(filething)
717
718    Arguments:
719        filething (filething)
720
721    Attributes:
722        tags (`APEv2`)
723    """
724
725    class _Info(StreamInfo):
726        length = 0
727        bitrate = 0
728
729        def __init__(self, fileobj):
730            pass
731
732        @staticmethod
733        def pprint():
734            return u"Unknown format with APEv2 tag."
735
736    @loadfile()
737    def load(self, filething):
738        fileobj = filething.fileobj
739
740        self.info = self._Info(fileobj)
741        try:
742            fileobj.seek(0, 0)
743        except IOError as e:
744            raise error(e)
745
746        try:
747            self.tags = APEv2(fileobj)
748        except APENoHeaderError:
749            self.tags = None
750
751    def add_tags(self):
752        if self.tags is None:
753            self.tags = APEv2()
754        else:
755            raise error("%r already has tags: %r" % (self, self.tags))
756
757    @staticmethod
758    def score(filename, fileobj, header):
759        try:
760            seek_end(fileobj, 160)
761            footer = fileobj.read()
762        except IOError:
763            return -1
764        return ((b"APETAGEX" in footer) - header.startswith(b"ID3"))
765