1# -*- coding: utf-8 -*-
2# Copyright (C) 2005-2006  Joe Wreschnig
3#                    2013  Christoph Reiter
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9
10"""Read and write Vorbis comment data.
11
12Vorbis comments are freeform key/value pairs; keys are
13case-insensitive ASCII and values are Unicode strings. A key may have
14multiple values.
15
16The specification is at http://www.xiph.org/vorbis/doc/v-comment.html.
17"""
18
19import sys
20
21import mutagen
22from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2
23from mutagen._util import DictMixin, cdata, MutagenError
24
25
26def is_valid_key(key):
27    """Return true if a string is a valid Vorbis comment key.
28
29    Valid Vorbis comment keys are printable ASCII between 0x20 (space)
30    and 0x7D ('}'), excluding '='.
31
32    Takes str/unicode in Python 2, unicode in Python 3
33    """
34
35    if PY3 and isinstance(key, bytes):
36        raise TypeError("needs to be str not bytes")
37
38    for c in key:
39        if c < " " or c > "}" or c == "=":
40            return False
41    else:
42        return bool(key)
43
44
45istag = is_valid_key
46
47
48class error(MutagenError):
49    pass
50
51
52class VorbisUnsetFrameError(error):
53    pass
54
55
56class VorbisEncodingError(error):
57    pass
58
59
60class VComment(mutagen.Tags, list):
61    """A Vorbis comment parser, accessor, and renderer.
62
63    All comment ordering is preserved. A VComment is a list of
64    key/value pairs, and so any Python list method can be used on it.
65
66    Vorbis comments are always wrapped in something like an Ogg Vorbis
67    bitstream or a FLAC metadata block, so this loads string data or a
68    file-like object, not a filename.
69
70    Attributes:
71        vendor (text): the stream 'vendor' (i.e. writer); default 'Mutagen'
72    """
73
74    vendor = u"Mutagen " + mutagen.version_string
75
76    def __init__(self, data=None, *args, **kwargs):
77        self._size = 0
78        # Collect the args to pass to load, this lets child classes
79        # override just load and get equivalent magic for the
80        # constructor.
81        if data is not None:
82            if isinstance(data, bytes):
83                data = BytesIO(data)
84            elif not hasattr(data, 'read'):
85                raise TypeError("VComment requires bytes or a file-like")
86            start = data.tell()
87            self.load(data, *args, **kwargs)
88            self._size = data.tell() - start
89
90    def load(self, fileobj, errors='replace', framing=True):
91        """Parse a Vorbis comment from a file-like object.
92
93        Arguments:
94            errors (str): 'strict', 'replace', or 'ignore'.
95                This affects Unicode decoding and how other malformed content
96                is interpreted.
97            framing (bool): if true, fail if a framing bit is not present
98
99        Framing bits are required by the Vorbis comment specification,
100        but are not used in FLAC Vorbis comment blocks.
101        """
102
103        try:
104            vendor_length = cdata.uint_le(fileobj.read(4))
105            self.vendor = fileobj.read(vendor_length).decode('utf-8', errors)
106            count = cdata.uint_le(fileobj.read(4))
107            for i in xrange(count):
108                length = cdata.uint_le(fileobj.read(4))
109                try:
110                    string = fileobj.read(length).decode('utf-8', errors)
111                except (OverflowError, MemoryError):
112                    raise error("cannot read %d bytes, too large" % length)
113                try:
114                    tag, value = string.split('=', 1)
115                except ValueError as err:
116                    if errors == "ignore":
117                        continue
118                    elif errors == "replace":
119                        tag, value = u"unknown%d" % i, string
120                    else:
121                        reraise(VorbisEncodingError, err, sys.exc_info()[2])
122                try:
123                    tag = tag.encode('ascii', errors)
124                except UnicodeEncodeError:
125                    raise VorbisEncodingError("invalid tag name %r" % tag)
126                else:
127                    # string keys in py3k
128                    if PY3:
129                        tag = tag.decode("ascii")
130                    if is_valid_key(tag):
131                        self.append((tag, value))
132
133            if framing and not bytearray(fileobj.read(1))[0] & 0x01:
134                raise VorbisUnsetFrameError("framing bit was unset")
135        except (cdata.error, TypeError):
136            raise error("file is not a valid Vorbis comment")
137
138    def validate(self):
139        """Validate keys and values.
140
141        Check to make sure every key used is a valid Vorbis key, and
142        that every value used is a valid Unicode or UTF-8 string. If
143        any invalid keys or values are found, a ValueError is raised.
144
145        In Python 3 all keys and values have to be a string.
146        """
147
148        if not isinstance(self.vendor, text_type):
149            if PY3:
150                raise ValueError("vendor needs to be str")
151
152            try:
153                self.vendor.decode('utf-8')
154            except UnicodeDecodeError:
155                raise ValueError
156
157        for key, value in self:
158            try:
159                if not is_valid_key(key):
160                    raise ValueError("%r is not a valid key" % key)
161            except TypeError:
162                raise ValueError("%r is not a valid key" % key)
163
164            if not isinstance(value, text_type):
165                if PY3:
166                    err = "%r needs to be str for key %r" % (value, key)
167                    raise ValueError(err)
168
169                try:
170                    value.decode("utf-8")
171                except Exception:
172                    err = "%r is not a valid value for key %r" % (value, key)
173                    raise ValueError(err)
174
175        return True
176
177    def clear(self):
178        """Clear all keys from the comment."""
179
180        for i in list(self):
181            self.remove(i)
182
183    def write(self, framing=True):
184        """Return a string representation of the data.
185
186        Validation is always performed, so calling this function on
187        invalid data may raise a ValueError.
188
189        Arguments:
190            framing (bool): if true, append a framing bit (see load)
191        """
192
193        self.validate()
194
195        def _encode(value):
196            if not isinstance(value, bytes):
197                return value.encode('utf-8')
198            return value
199
200        f = BytesIO()
201        vendor = _encode(self.vendor)
202        f.write(cdata.to_uint_le(len(vendor)))
203        f.write(vendor)
204        f.write(cdata.to_uint_le(len(self)))
205        for tag, value in self:
206            tag = _encode(tag)
207            value = _encode(value)
208            comment = tag + b"=" + value
209            f.write(cdata.to_uint_le(len(comment)))
210            f.write(comment)
211        if framing:
212            f.write(b"\x01")
213        return f.getvalue()
214
215    def pprint(self):
216
217        def _decode(value):
218            if not isinstance(value, text_type):
219                return value.decode('utf-8', 'replace')
220            return value
221
222        tags = [u"%s=%s" % (_decode(k), _decode(v)) for k, v in self]
223        return u"\n".join(tags)
224
225
226class VCommentDict(VComment, DictMixin):
227    """A VComment that looks like a dictionary.
228
229    This object differs from a dictionary in two ways. First,
230    len(comment) will still return the number of values, not the
231    number of keys. Secondly, iterating through the object will
232    iterate over (key, value) pairs, not keys. Since a key may have
233    multiple values, the same value may appear multiple times while
234    iterating.
235
236    Since Vorbis comment keys are case-insensitive, all keys are
237    normalized to lowercase ASCII.
238    """
239
240    def __getitem__(self, key):
241        """A list of values for the key.
242
243        This is a copy, so comment['title'].append('a title') will not
244        work.
245        """
246
247        # PY3 only
248        if isinstance(key, slice):
249            return VComment.__getitem__(self, key)
250
251        if not is_valid_key(key):
252            raise ValueError
253
254        key = key.lower()
255
256        values = [value for (k, value) in self if k.lower() == key]
257        if not values:
258            raise KeyError(key)
259        else:
260            return values
261
262    def __delitem__(self, key):
263        """Delete all values associated with the key."""
264
265        # PY3 only
266        if isinstance(key, slice):
267            return VComment.__delitem__(self, key)
268
269        if not is_valid_key(key):
270            raise ValueError
271
272        key = key.lower()
273        to_delete = [x for x in self if x[0].lower() == key]
274        if not to_delete:
275            raise KeyError(key)
276        else:
277            for item in to_delete:
278                self.remove(item)
279
280    def __contains__(self, key):
281        """Return true if the key has any values."""
282
283        if not is_valid_key(key):
284            raise ValueError
285
286        key = key.lower()
287        for k, value in self:
288            if k.lower() == key:
289                return True
290        else:
291            return False
292
293    def __setitem__(self, key, values):
294        """Set a key's value or values.
295
296        Setting a value overwrites all old ones. The value may be a
297        list of Unicode or UTF-8 strings, or a single Unicode or UTF-8
298        string.
299        """
300
301        # PY3 only
302        if isinstance(key, slice):
303            return VComment.__setitem__(self, key, values)
304
305        if not is_valid_key(key):
306            raise ValueError
307
308        if not isinstance(values, list):
309            values = [values]
310        try:
311            del(self[key])
312        except KeyError:
313            pass
314
315        if PY2:
316            key = key.encode('ascii')
317
318        for value in values:
319            self.append((key, value))
320
321    def keys(self):
322        """Return all keys in the comment."""
323
324        return list(set([k.lower() for k, v in self]))
325
326    def as_dict(self):
327        """Return a copy of the comment data in a real dict."""
328
329        return dict([(key, self[key]) for key in self.keys()])
330