1# -*- coding: utf-8 -*- 2# Copyright (C) 2005-2006 Joe Wreschnig 3# 2013 Christoph Reiter 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 2 of the License, or 8# (at your option) any later version. 9 10"""Read and write Vorbis comment data. 11 12Vorbis comments are freeform key/value pairs; keys are 13case-insensitive ASCII and values are Unicode strings. A key may have 14multiple values. 15 16The specification is at http://www.xiph.org/vorbis/doc/v-comment.html. 17""" 18 19import sys 20 21import mutagen 22from ._compat import reraise, BytesIO, text_type, xrange, PY3, PY2 23from mutagen._util import DictMixin, cdata, MutagenError 24 25 26def is_valid_key(key): 27 """Return true if a string is a valid Vorbis comment key. 28 29 Valid Vorbis comment keys are printable ASCII between 0x20 (space) 30 and 0x7D ('}'), excluding '='. 31 32 Takes str/unicode in Python 2, unicode in Python 3 33 """ 34 35 if PY3 and isinstance(key, bytes): 36 raise TypeError("needs to be str not bytes") 37 38 for c in key: 39 if c < " " or c > "}" or c == "=": 40 return False 41 else: 42 return bool(key) 43 44 45istag = is_valid_key 46 47 48class error(MutagenError): 49 pass 50 51 52class VorbisUnsetFrameError(error): 53 pass 54 55 56class VorbisEncodingError(error): 57 pass 58 59 60class VComment(mutagen.Tags, list): 61 """A Vorbis comment parser, accessor, and renderer. 62 63 All comment ordering is preserved. A VComment is a list of 64 key/value pairs, and so any Python list method can be used on it. 65 66 Vorbis comments are always wrapped in something like an Ogg Vorbis 67 bitstream or a FLAC metadata block, so this loads string data or a 68 file-like object, not a filename. 69 70 Attributes: 71 vendor (text): the stream 'vendor' (i.e. writer); default 'Mutagen' 72 """ 73 74 vendor = u"Mutagen " + mutagen.version_string 75 76 def __init__(self, data=None, *args, **kwargs): 77 self._size = 0 78 # Collect the args to pass to load, this lets child classes 79 # override just load and get equivalent magic for the 80 # constructor. 81 if data is not None: 82 if isinstance(data, bytes): 83 data = BytesIO(data) 84 elif not hasattr(data, 'read'): 85 raise TypeError("VComment requires bytes or a file-like") 86 start = data.tell() 87 self.load(data, *args, **kwargs) 88 self._size = data.tell() - start 89 90 def load(self, fileobj, errors='replace', framing=True): 91 """Parse a Vorbis comment from a file-like object. 92 93 Arguments: 94 errors (str): 'strict', 'replace', or 'ignore'. 95 This affects Unicode decoding and how other malformed content 96 is interpreted. 97 framing (bool): if true, fail if a framing bit is not present 98 99 Framing bits are required by the Vorbis comment specification, 100 but are not used in FLAC Vorbis comment blocks. 101 """ 102 103 try: 104 vendor_length = cdata.uint_le(fileobj.read(4)) 105 self.vendor = fileobj.read(vendor_length).decode('utf-8', errors) 106 count = cdata.uint_le(fileobj.read(4)) 107 for i in xrange(count): 108 length = cdata.uint_le(fileobj.read(4)) 109 try: 110 string = fileobj.read(length).decode('utf-8', errors) 111 except (OverflowError, MemoryError): 112 raise error("cannot read %d bytes, too large" % length) 113 try: 114 tag, value = string.split('=', 1) 115 except ValueError as err: 116 if errors == "ignore": 117 continue 118 elif errors == "replace": 119 tag, value = u"unknown%d" % i, string 120 else: 121 reraise(VorbisEncodingError, err, sys.exc_info()[2]) 122 try: 123 tag = tag.encode('ascii', errors) 124 except UnicodeEncodeError: 125 raise VorbisEncodingError("invalid tag name %r" % tag) 126 else: 127 # string keys in py3k 128 if PY3: 129 tag = tag.decode("ascii") 130 if is_valid_key(tag): 131 self.append((tag, value)) 132 133 if framing and not bytearray(fileobj.read(1))[0] & 0x01: 134 raise VorbisUnsetFrameError("framing bit was unset") 135 except (cdata.error, TypeError): 136 raise error("file is not a valid Vorbis comment") 137 138 def validate(self): 139 """Validate keys and values. 140 141 Check to make sure every key used is a valid Vorbis key, and 142 that every value used is a valid Unicode or UTF-8 string. If 143 any invalid keys or values are found, a ValueError is raised. 144 145 In Python 3 all keys and values have to be a string. 146 """ 147 148 if not isinstance(self.vendor, text_type): 149 if PY3: 150 raise ValueError("vendor needs to be str") 151 152 try: 153 self.vendor.decode('utf-8') 154 except UnicodeDecodeError: 155 raise ValueError 156 157 for key, value in self: 158 try: 159 if not is_valid_key(key): 160 raise ValueError("%r is not a valid key" % key) 161 except TypeError: 162 raise ValueError("%r is not a valid key" % key) 163 164 if not isinstance(value, text_type): 165 if PY3: 166 err = "%r needs to be str for key %r" % (value, key) 167 raise ValueError(err) 168 169 try: 170 value.decode("utf-8") 171 except Exception: 172 err = "%r is not a valid value for key %r" % (value, key) 173 raise ValueError(err) 174 175 return True 176 177 def clear(self): 178 """Clear all keys from the comment.""" 179 180 for i in list(self): 181 self.remove(i) 182 183 def write(self, framing=True): 184 """Return a string representation of the data. 185 186 Validation is always performed, so calling this function on 187 invalid data may raise a ValueError. 188 189 Arguments: 190 framing (bool): if true, append a framing bit (see load) 191 """ 192 193 self.validate() 194 195 def _encode(value): 196 if not isinstance(value, bytes): 197 return value.encode('utf-8') 198 return value 199 200 f = BytesIO() 201 vendor = _encode(self.vendor) 202 f.write(cdata.to_uint_le(len(vendor))) 203 f.write(vendor) 204 f.write(cdata.to_uint_le(len(self))) 205 for tag, value in self: 206 tag = _encode(tag) 207 value = _encode(value) 208 comment = tag + b"=" + value 209 f.write(cdata.to_uint_le(len(comment))) 210 f.write(comment) 211 if framing: 212 f.write(b"\x01") 213 return f.getvalue() 214 215 def pprint(self): 216 217 def _decode(value): 218 if not isinstance(value, text_type): 219 return value.decode('utf-8', 'replace') 220 return value 221 222 tags = [u"%s=%s" % (_decode(k), _decode(v)) for k, v in self] 223 return u"\n".join(tags) 224 225 226class VCommentDict(VComment, DictMixin): 227 """A VComment that looks like a dictionary. 228 229 This object differs from a dictionary in two ways. First, 230 len(comment) will still return the number of values, not the 231 number of keys. Secondly, iterating through the object will 232 iterate over (key, value) pairs, not keys. Since a key may have 233 multiple values, the same value may appear multiple times while 234 iterating. 235 236 Since Vorbis comment keys are case-insensitive, all keys are 237 normalized to lowercase ASCII. 238 """ 239 240 def __getitem__(self, key): 241 """A list of values for the key. 242 243 This is a copy, so comment['title'].append('a title') will not 244 work. 245 """ 246 247 # PY3 only 248 if isinstance(key, slice): 249 return VComment.__getitem__(self, key) 250 251 if not is_valid_key(key): 252 raise ValueError 253 254 key = key.lower() 255 256 values = [value for (k, value) in self if k.lower() == key] 257 if not values: 258 raise KeyError(key) 259 else: 260 return values 261 262 def __delitem__(self, key): 263 """Delete all values associated with the key.""" 264 265 # PY3 only 266 if isinstance(key, slice): 267 return VComment.__delitem__(self, key) 268 269 if not is_valid_key(key): 270 raise ValueError 271 272 key = key.lower() 273 to_delete = [x for x in self if x[0].lower() == key] 274 if not to_delete: 275 raise KeyError(key) 276 else: 277 for item in to_delete: 278 self.remove(item) 279 280 def __contains__(self, key): 281 """Return true if the key has any values.""" 282 283 if not is_valid_key(key): 284 raise ValueError 285 286 key = key.lower() 287 for k, value in self: 288 if k.lower() == key: 289 return True 290 else: 291 return False 292 293 def __setitem__(self, key, values): 294 """Set a key's value or values. 295 296 Setting a value overwrites all old ones. The value may be a 297 list of Unicode or UTF-8 strings, or a single Unicode or UTF-8 298 string. 299 """ 300 301 # PY3 only 302 if isinstance(key, slice): 303 return VComment.__setitem__(self, key, values) 304 305 if not is_valid_key(key): 306 raise ValueError 307 308 if not isinstance(values, list): 309 values = [values] 310 try: 311 del(self[key]) 312 except KeyError: 313 pass 314 315 if PY2: 316 key = key.encode('ascii') 317 318 for value in values: 319 self.append((key, value)) 320 321 def keys(self): 322 """Return all keys in the comment.""" 323 324 return list(set([k.lower() for k, v in self])) 325 326 def as_dict(self): 327 """Return a copy of the comment data in a real dict.""" 328 329 return dict([(key, self[key]) for key in self.keys()]) 330