1# -*- coding: utf-8 -*- 2# Copyright (C) 2005 Joe Wreschnig 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8 9"""APEv2 reading and writing. 10 11The APEv2 format is most commonly used with Musepack files, but is 12also the format of choice for WavPack and other formats. Some MP3s 13also have APEv2 tags, but this can cause problems with many MP3 14decoders and taggers. 15 16APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2 17keys can be any ASCII string with characters from 0x20 to 0x7E, 18between 2 and 255 characters long. Keys are case-sensitive, but 19readers are recommended to be case insensitive, and it is forbidden to 20multiple keys which differ only in case. Keys are usually stored 21title-cased (e.g. 'Artist' rather than 'artist'). 22 23APEv2 values are slightly more structured than Vorbis comments; values 24are flagged as one of text, binary, or an external reference (usually 25a URI). 26 27Based off the format specification found at 28http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification. 29""" 30 31__all__ = ["APEv2", "APEv2File", "Open", "delete"] 32 33import sys 34import struct 35from collections import MutableSequence 36 37from ._compat import (cBytesIO, PY3, text_type, PY2, reraise, swap_to_string, 38 xrange) 39from mutagen import Metadata, FileType, StreamInfo 40from mutagen._util import DictMixin, cdata, delete_bytes, total_ordering, \ 41 MutagenError, loadfile, convert_error, seek_end, get_size 42 43 44def is_valid_apev2_key(key): 45 if not isinstance(key, text_type): 46 if PY3: 47 raise TypeError("APEv2 key must be str") 48 49 try: 50 key = key.decode('ascii') 51 except UnicodeDecodeError: 52 return False 53 54 # PY26 - Change to set literal syntax (since set is faster than list here) 55 return ((2 <= len(key) <= 255) and (min(key) >= u' ') and 56 (max(key) <= u'~') and 57 (key not in [u"OggS", u"TAG", u"ID3", u"MP+"])) 58 59# There are three different kinds of APE tag values. 60# "0: Item contains text information coded in UTF-8 61# 1: Item contains binary information 62# 2: Item is a locator of external stored information [e.g. URL] 63# 3: reserved" 64TEXT, BINARY, EXTERNAL = xrange(3) 65 66HAS_HEADER = 1 << 31 67HAS_NO_FOOTER = 1 << 30 68IS_HEADER = 1 << 29 69 70 71class error(MutagenError): 72 pass 73 74 75class APENoHeaderError(error): 76 pass 77 78 79class APEUnsupportedVersionError(error): 80 pass 81 82 83class APEBadItemError(error): 84 pass 85 86 87class _APEv2Data(object): 88 # Store offsets of the important parts of the file. 89 start = header = data = footer = end = None 90 # Footer or header; seek here and read 32 to get version/size/items/flags 91 metadata = None 92 # Actual tag data 93 tag = None 94 95 version = None 96 size = None 97 items = None 98 flags = 0 99 100 # The tag is at the start rather than the end. A tag at both 101 # the start and end of the file (i.e. the tag is the whole file) 102 # is not considered to be at the start. 103 is_at_start = False 104 105 def __init__(self, fileobj): 106 """Raises IOError and apev2.error""" 107 108 self.__find_metadata(fileobj) 109 110 if self.header is None: 111 self.metadata = self.footer 112 elif self.footer is None: 113 self.metadata = self.header 114 else: 115 self.metadata = max(self.header, self.footer) 116 117 if self.metadata is None: 118 return 119 120 self.__fill_missing(fileobj) 121 self.__fix_brokenness(fileobj) 122 if self.data is not None: 123 fileobj.seek(self.data) 124 self.tag = fileobj.read(self.size) 125 126 def __find_metadata(self, fileobj): 127 # Try to find a header or footer. 128 129 # Check for a simple footer. 130 try: 131 fileobj.seek(-32, 2) 132 except IOError: 133 fileobj.seek(0, 2) 134 return 135 if fileobj.read(8) == b"APETAGEX": 136 fileobj.seek(-8, 1) 137 self.footer = self.metadata = fileobj.tell() 138 return 139 140 # Check for an APEv2 tag followed by an ID3v1 tag at the end. 141 try: 142 if get_size(fileobj) < 128: 143 raise IOError 144 fileobj.seek(-128, 2) 145 if fileobj.read(3) == b"TAG": 146 147 fileobj.seek(-35, 1) # "TAG" + header length 148 if fileobj.read(8) == b"APETAGEX": 149 fileobj.seek(-8, 1) 150 self.footer = fileobj.tell() 151 return 152 153 # ID3v1 tag at the end, maybe preceded by Lyrics3v2. 154 # (http://www.id3.org/lyrics3200.html) 155 # (header length - "APETAGEX") - "LYRICS200" 156 fileobj.seek(15, 1) 157 if fileobj.read(9) == b'LYRICS200': 158 fileobj.seek(-15, 1) # "LYRICS200" + size tag 159 try: 160 offset = int(fileobj.read(6)) 161 except ValueError: 162 raise IOError 163 164 fileobj.seek(-32 - offset - 6, 1) 165 if fileobj.read(8) == b"APETAGEX": 166 fileobj.seek(-8, 1) 167 self.footer = fileobj.tell() 168 return 169 170 except IOError: 171 pass 172 173 # Check for a tag at the start. 174 fileobj.seek(0, 0) 175 if fileobj.read(8) == b"APETAGEX": 176 self.is_at_start = True 177 self.header = 0 178 179 def __fill_missing(self, fileobj): 180 """Raises IOError and apev2.error""" 181 182 fileobj.seek(self.metadata + 8) 183 184 data = fileobj.read(16) 185 if len(data) != 16: 186 raise error 187 188 self.version = data[:4] 189 self.size = cdata.uint32_le(data[4:8]) 190 self.items = cdata.uint32_le(data[8:12]) 191 self.flags = cdata.uint32_le(data[12:]) 192 193 if self.header is not None: 194 self.data = self.header + 32 195 # If we're reading the header, the size is the header 196 # offset + the size, which includes the footer. 197 self.end = self.data + self.size 198 fileobj.seek(self.end - 32, 0) 199 if fileobj.read(8) == b"APETAGEX": 200 self.footer = self.end - 32 201 elif self.footer is not None: 202 self.end = self.footer + 32 203 self.data = self.end - self.size 204 if self.flags & HAS_HEADER: 205 self.header = self.data - 32 206 else: 207 self.header = self.data 208 else: 209 raise APENoHeaderError("No APE tag found") 210 211 # exclude the footer from size 212 if self.footer is not None: 213 self.size -= 32 214 215 def __fix_brokenness(self, fileobj): 216 # Fix broken tags written with PyMusepack. 217 if self.header is not None: 218 start = self.header 219 else: 220 start = self.data 221 fileobj.seek(start) 222 223 while start > 0: 224 # Clean up broken writing from pre-Mutagen PyMusepack. 225 # It didn't remove the first 24 bytes of header. 226 try: 227 fileobj.seek(-24, 1) 228 except IOError: 229 break 230 else: 231 if fileobj.read(8) == b"APETAGEX": 232 fileobj.seek(-8, 1) 233 start = fileobj.tell() 234 else: 235 break 236 self.start = start 237 238 239class _CIDictProxy(DictMixin): 240 241 def __init__(self, *args, **kwargs): 242 self.__casemap = {} 243 self.__dict = {} 244 super(_CIDictProxy, self).__init__(*args, **kwargs) 245 # Internally all names are stored as lowercase, but the case 246 # they were set with is remembered and used when saving. This 247 # is roughly in line with the standard, which says that keys 248 # are case-sensitive but two keys differing only in case are 249 # not allowed, and recommends case-insensitive 250 # implementations. 251 252 def __getitem__(self, key): 253 return self.__dict[key.lower()] 254 255 def __setitem__(self, key, value): 256 lower = key.lower() 257 self.__casemap[lower] = key 258 self.__dict[lower] = value 259 260 def __delitem__(self, key): 261 lower = key.lower() 262 del(self.__casemap[lower]) 263 del(self.__dict[lower]) 264 265 def keys(self): 266 return [self.__casemap.get(key, key) for key in self.__dict.keys()] 267 268 269class APEv2(_CIDictProxy, Metadata): 270 """APEv2(filething=None) 271 272 A file with an APEv2 tag. 273 274 ID3v1 tags are silently ignored and overwritten. 275 """ 276 277 filename = None 278 279 def pprint(self): 280 """Return tag key=value pairs in a human-readable format.""" 281 282 items = sorted(self.items()) 283 return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items) 284 285 @convert_error(IOError, error) 286 @loadfile() 287 def load(self, filething): 288 """Load tags from a filename. 289 290 Raises apev2.error 291 """ 292 293 data = _APEv2Data(filething.fileobj) 294 295 if data.tag: 296 self.clear() 297 self.__parse_tag(data.tag, data.items) 298 else: 299 raise APENoHeaderError("No APE tag found") 300 301 def __parse_tag(self, tag, count): 302 """Raises IOError and APEBadItemError""" 303 304 fileobj = cBytesIO(tag) 305 306 for i in xrange(count): 307 tag_data = fileobj.read(8) 308 # someone writes wrong item counts 309 if not tag_data: 310 break 311 if len(tag_data) != 8: 312 raise error 313 size = cdata.uint32_le(tag_data[:4]) 314 flags = cdata.uint32_le(tag_data[4:8]) 315 316 # Bits 1 and 2 bits are flags, 0-3 317 # Bit 0 is read/write flag, ignored 318 kind = (flags & 6) >> 1 319 if kind == 3: 320 raise APEBadItemError("value type must be 0, 1, or 2") 321 322 key = value = fileobj.read(1) 323 if not key: 324 raise APEBadItemError 325 while key[-1:] != b'\x00' and value: 326 value = fileobj.read(1) 327 if not value: 328 raise APEBadItemError 329 key += value 330 if key[-1:] == b"\x00": 331 key = key[:-1] 332 333 if PY3: 334 try: 335 key = key.decode("ascii") 336 except UnicodeError as err: 337 reraise(APEBadItemError, err, sys.exc_info()[2]) 338 value = fileobj.read(size) 339 if len(value) != size: 340 raise APEBadItemError 341 342 value = _get_value_type(kind)._new(value) 343 344 self[key] = value 345 346 def __getitem__(self, key): 347 if not is_valid_apev2_key(key): 348 raise KeyError("%r is not a valid APEv2 key" % key) 349 if PY2: 350 key = key.encode('ascii') 351 352 return super(APEv2, self).__getitem__(key) 353 354 def __delitem__(self, key): 355 if not is_valid_apev2_key(key): 356 raise KeyError("%r is not a valid APEv2 key" % key) 357 if PY2: 358 key = key.encode('ascii') 359 360 super(APEv2, self).__delitem__(key) 361 362 def __setitem__(self, key, value): 363 """'Magic' value setter. 364 365 This function tries to guess at what kind of value you want to 366 store. If you pass in a valid UTF-8 or Unicode string, it 367 treats it as a text value. If you pass in a list, it treats it 368 as a list of string/Unicode values. If you pass in a string 369 that is not valid UTF-8, it assumes it is a binary value. 370 371 Python 3: all bytes will be assumed to be a byte value, even 372 if they are valid utf-8. 373 374 If you need to force a specific type of value (e.g. binary 375 data that also happens to be valid UTF-8, or an external 376 reference), use the APEValue factory and set the value to the 377 result of that:: 378 379 from mutagen.apev2 import APEValue, EXTERNAL 380 tag['Website'] = APEValue('http://example.org', EXTERNAL) 381 """ 382 383 if not is_valid_apev2_key(key): 384 raise KeyError("%r is not a valid APEv2 key" % key) 385 386 if PY2: 387 key = key.encode('ascii') 388 389 if not isinstance(value, _APEValue): 390 # let's guess at the content if we're not already a value... 391 if isinstance(value, text_type): 392 # unicode? we've got to be text. 393 value = APEValue(value, TEXT) 394 elif isinstance(value, list): 395 items = [] 396 for v in value: 397 if not isinstance(v, text_type): 398 if PY3: 399 raise TypeError("item in list not str") 400 v = v.decode("utf-8") 401 items.append(v) 402 403 # list? text. 404 value = APEValue(u"\0".join(items), TEXT) 405 else: 406 if PY3: 407 value = APEValue(value, BINARY) 408 else: 409 try: 410 value.decode("utf-8") 411 except UnicodeError: 412 # invalid UTF8 text, probably binary 413 value = APEValue(value, BINARY) 414 else: 415 # valid UTF8, probably text 416 value = APEValue(value, TEXT) 417 418 super(APEv2, self).__setitem__(key, value) 419 420 @convert_error(IOError, error) 421 @loadfile(writable=True, create=True) 422 def save(self, filething=None): 423 """Save changes to a file. 424 425 If no filename is given, the one most recently loaded is used. 426 427 Tags are always written at the end of the file, and include 428 a header and a footer. 429 """ 430 431 fileobj = filething.fileobj 432 433 data = _APEv2Data(fileobj) 434 435 if data.is_at_start: 436 delete_bytes(fileobj, data.end - data.start, data.start) 437 elif data.start is not None: 438 fileobj.seek(data.start) 439 # Delete an ID3v1 tag if present, too. 440 fileobj.truncate() 441 fileobj.seek(0, 2) 442 443 tags = [] 444 for key, value in self.items(): 445 # Packed format for an item: 446 # 4B: Value length 447 # 4B: Value type 448 # Key name 449 # 1B: Null 450 # Key value 451 value_data = value._write() 452 if not isinstance(key, bytes): 453 key = key.encode("utf-8") 454 tag_data = bytearray() 455 tag_data += struct.pack("<2I", len(value_data), value.kind << 1) 456 tag_data += key + b"\0" + value_data 457 tags.append(bytes(tag_data)) 458 459 # "APE tags items should be sorted ascending by size... This is 460 # not a MUST, but STRONGLY recommended. Actually the items should 461 # be sorted by importance/byte, but this is not feasible." 462 tags.sort(key=lambda tag: (len(tag), tag)) 463 num_tags = len(tags) 464 tags = b"".join(tags) 465 466 header = bytearray(b"APETAGEX") 467 # version, tag size, item count, flags 468 header += struct.pack("<4I", 2000, len(tags) + 32, num_tags, 469 HAS_HEADER | IS_HEADER) 470 header += b"\0" * 8 471 fileobj.write(header) 472 473 fileobj.write(tags) 474 475 footer = bytearray(b"APETAGEX") 476 footer += struct.pack("<4I", 2000, len(tags) + 32, num_tags, 477 HAS_HEADER) 478 footer += b"\0" * 8 479 480 fileobj.write(footer) 481 482 @convert_error(IOError, error) 483 @loadfile(writable=True) 484 def delete(self, filething=None): 485 """Remove tags from a file.""" 486 487 fileobj = filething.fileobj 488 data = _APEv2Data(fileobj) 489 if data.start is not None and data.size is not None: 490 delete_bytes(fileobj, data.end - data.start, data.start) 491 self.clear() 492 493 494Open = APEv2 495 496 497@convert_error(IOError, error) 498@loadfile(method=False, writable=True) 499def delete(filething): 500 """delete(filething) 501 502 Arguments: 503 filething (filething) 504 Raises: 505 mutagen.MutagenError 506 507 Remove tags from a file. 508 """ 509 510 try: 511 t = APEv2(filething) 512 except APENoHeaderError: 513 return 514 filething.fileobj.seek(0) 515 t.delete(filething) 516 517 518def _get_value_type(kind): 519 """Returns a _APEValue subclass or raises ValueError""" 520 521 if kind == TEXT: 522 return APETextValue 523 elif kind == BINARY: 524 return APEBinaryValue 525 elif kind == EXTERNAL: 526 return APEExtValue 527 raise ValueError("unknown kind %r" % kind) 528 529 530def APEValue(value, kind): 531 """APEv2 tag value factory. 532 533 Use this if you need to specify the value's type manually. Binary 534 and text data are automatically detected by APEv2.__setitem__. 535 """ 536 537 try: 538 type_ = _get_value_type(kind) 539 except ValueError: 540 raise ValueError("kind must be TEXT, BINARY, or EXTERNAL") 541 else: 542 return type_(value) 543 544 545class _APEValue(object): 546 547 kind = None 548 value = None 549 550 def __init__(self, value, kind=None): 551 # kind kwarg is for backwards compat 552 if kind is not None and kind != self.kind: 553 raise ValueError 554 self.value = self._validate(value) 555 556 @classmethod 557 def _new(cls, data): 558 instance = cls.__new__(cls) 559 instance._parse(data) 560 return instance 561 562 def _parse(self, data): 563 """Sets value or raises APEBadItemError""" 564 565 raise NotImplementedError 566 567 def _write(self): 568 """Returns bytes""" 569 570 raise NotImplementedError 571 572 def _validate(self, value): 573 """Returns validated value or raises TypeError/ValueErrr""" 574 575 raise NotImplementedError 576 577 def __repr__(self): 578 return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind) 579 580 581@swap_to_string 582@total_ordering 583class _APEUtf8Value(_APEValue): 584 585 def _parse(self, data): 586 try: 587 self.value = data.decode("utf-8") 588 except UnicodeDecodeError as e: 589 reraise(APEBadItemError, e, sys.exc_info()[2]) 590 591 def _validate(self, value): 592 if not isinstance(value, text_type): 593 if PY3: 594 raise TypeError("value not str") 595 else: 596 value = value.decode("utf-8") 597 return value 598 599 def _write(self): 600 return self.value.encode("utf-8") 601 602 def __len__(self): 603 return len(self.value) 604 605 def __bytes__(self): 606 return self._write() 607 608 def __eq__(self, other): 609 return self.value == other 610 611 def __lt__(self, other): 612 return self.value < other 613 614 def __str__(self): 615 return self.value 616 617 618class APETextValue(_APEUtf8Value, MutableSequence): 619 """An APEv2 text value. 620 621 Text values are Unicode/UTF-8 strings. They can be accessed like 622 strings (with a null separating the values), or arrays of strings. 623 """ 624 625 kind = TEXT 626 627 def __iter__(self): 628 """Iterate over the strings of the value (not the characters)""" 629 630 return iter(self.value.split(u"\0")) 631 632 def __getitem__(self, index): 633 return self.value.split(u"\0")[index] 634 635 def __len__(self): 636 return self.value.count(u"\0") + 1 637 638 def __setitem__(self, index, value): 639 if not isinstance(value, text_type): 640 if PY3: 641 raise TypeError("value not str") 642 else: 643 value = value.decode("utf-8") 644 645 values = list(self) 646 values[index] = value 647 self.value = u"\0".join(values) 648 649 def insert(self, index, value): 650 if not isinstance(value, text_type): 651 if PY3: 652 raise TypeError("value not str") 653 else: 654 value = value.decode("utf-8") 655 656 values = list(self) 657 values.insert(index, value) 658 self.value = u"\0".join(values) 659 660 def __delitem__(self, index): 661 values = list(self) 662 del values[index] 663 self.value = u"\0".join(values) 664 665 def pprint(self): 666 return u" / ".join(self) 667 668 669@swap_to_string 670@total_ordering 671class APEBinaryValue(_APEValue): 672 """An APEv2 binary value.""" 673 674 kind = BINARY 675 676 def _parse(self, data): 677 self.value = data 678 679 def _write(self): 680 return self.value 681 682 def _validate(self, value): 683 if not isinstance(value, bytes): 684 raise TypeError("value not bytes") 685 return bytes(value) 686 687 def __len__(self): 688 return len(self.value) 689 690 def __bytes__(self): 691 return self._write() 692 693 def __eq__(self, other): 694 return self.value == other 695 696 def __lt__(self, other): 697 return self.value < other 698 699 def pprint(self): 700 return u"[%d bytes]" % len(self) 701 702 703class APEExtValue(_APEUtf8Value): 704 """An APEv2 external value. 705 706 External values are usually URI or IRI strings. 707 """ 708 709 kind = EXTERNAL 710 711 def pprint(self): 712 return u"[External] %s" % self.value 713 714 715class APEv2File(FileType): 716 """APEv2File(filething) 717 718 Arguments: 719 filething (filething) 720 721 Attributes: 722 tags (`APEv2`) 723 """ 724 725 class _Info(StreamInfo): 726 length = 0 727 bitrate = 0 728 729 def __init__(self, fileobj): 730 pass 731 732 @staticmethod 733 def pprint(): 734 return u"Unknown format with APEv2 tag." 735 736 @loadfile() 737 def load(self, filething): 738 fileobj = filething.fileobj 739 740 self.info = self._Info(fileobj) 741 try: 742 fileobj.seek(0, 0) 743 except IOError as e: 744 raise error(e) 745 746 try: 747 self.tags = APEv2(fileobj) 748 except APENoHeaderError: 749 self.tags = None 750 751 def add_tags(self): 752 if self.tags is None: 753 self.tags = APEv2() 754 else: 755 raise error("%r already has tags: %r" % (self, self.tags)) 756 757 @staticmethod 758 def score(filename, fileobj, header): 759 try: 760 seek_end(fileobj, 160) 761 footer = fileobj.read() 762 except IOError: 763 return -1 764 return ((b"APETAGEX" in footer) - header.startswith(b"ID3")) 765