1# -*- coding: utf-8 -*- 2# Copyright 2005 Michael Urman 3# Copyright 2016 Christoph Reiter 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 2 of the License, or 8# (at your option) any later version. 9 10import struct 11 12from mutagen._tags import Tags 13from mutagen._util import DictProxy, convert_error, read_full 14from mutagen._compat import PY3, text_type, itervalues 15 16from ._util import BitPaddedInt, unsynch, ID3JunkFrameError, \ 17 ID3EncryptionUnsupportedError, is_valid_frame_id, error, \ 18 ID3NoHeaderError, ID3UnsupportedVersionError, ID3SaveConfig 19from ._frames import TDRC, APIC, TDOR, TIME, TIPL, TORY, TDAT, Frames_2_2, \ 20 TextFrame, TYER, Frame, IPLS, Frames 21 22 23class ID3Header(object): 24 25 _V24 = (2, 4, 0) 26 _V23 = (2, 3, 0) 27 _V22 = (2, 2, 0) 28 _V11 = (1, 1) 29 30 f_unsynch = property(lambda s: bool(s._flags & 0x80)) 31 f_extended = property(lambda s: bool(s._flags & 0x40)) 32 f_experimental = property(lambda s: bool(s._flags & 0x20)) 33 f_footer = property(lambda s: bool(s._flags & 0x10)) 34 35 _known_frames = None 36 37 @property 38 def known_frames(self): 39 if self._known_frames is not None: 40 return self._known_frames 41 elif self.version >= ID3Header._V23: 42 return Frames 43 elif self.version >= ID3Header._V22: 44 return Frames_2_2 45 46 @convert_error(IOError, error) 47 def __init__(self, fileobj=None): 48 """Raises ID3NoHeaderError, ID3UnsupportedVersionError or error""" 49 50 if fileobj is None: 51 # for testing 52 self._flags = 0 53 return 54 55 fn = getattr(fileobj, "name", "<unknown>") 56 data = fileobj.read(10) 57 if len(data) != 10: 58 raise ID3NoHeaderError("%s: too small" % fn) 59 60 id3, vmaj, vrev, flags, size = struct.unpack('>3sBBB4s', data) 61 self._flags = flags 62 self.size = BitPaddedInt(size) + 10 63 self.version = (2, vmaj, vrev) 64 65 if id3 != b'ID3': 66 raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn) 67 68 if vmaj not in [2, 3, 4]: 69 raise ID3UnsupportedVersionError("%r ID3v2.%d not supported" 70 % (fn, vmaj)) 71 72 if not BitPaddedInt.has_valid_padding(size): 73 raise error("Header size not synchsafe") 74 75 if (self.version >= self._V24) and (flags & 0x0f): 76 raise error( 77 "%r has invalid flags %#02x" % (fn, flags)) 78 elif (self._V23 <= self.version < self._V24) and (flags & 0x1f): 79 raise error( 80 "%r has invalid flags %#02x" % (fn, flags)) 81 82 if self.f_extended: 83 extsize_data = read_full(fileobj, 4) 84 85 if PY3: 86 frame_id = extsize_data.decode("ascii", "replace") 87 else: 88 frame_id = extsize_data 89 90 if frame_id in Frames: 91 # Some tagger sets the extended header flag but 92 # doesn't write an extended header; in this case, the 93 # ID3 data follows immediately. Since no extended 94 # header is going to be long enough to actually match 95 # a frame, and if it's *not* a frame we're going to be 96 # completely lost anyway, this seems to be the most 97 # correct check. 98 # https://github.com/quodlibet/quodlibet/issues/126 99 self._flags ^= 0x40 100 extsize = 0 101 fileobj.seek(-4, 1) 102 elif self.version >= self._V24: 103 # "Where the 'Extended header size' is the size of the whole 104 # extended header, stored as a 32 bit synchsafe integer." 105 extsize = BitPaddedInt(extsize_data) - 4 106 if not BitPaddedInt.has_valid_padding(extsize_data): 107 raise error( 108 "Extended header size not synchsafe") 109 else: 110 # "Where the 'Extended header size', currently 6 or 10 bytes, 111 # excludes itself." 112 extsize = struct.unpack('>L', extsize_data)[0] 113 114 self._extdata = read_full(fileobj, extsize) 115 116 117def determine_bpi(data, frames, EMPTY=b"\x00" * 10): 118 """Takes id3v2.4 frame data and determines if ints or bitpaddedints 119 should be used for parsing. Needed because iTunes used to write 120 normal ints for frame sizes. 121 """ 122 123 # count number of tags found as BitPaddedInt and how far past 124 o = 0 125 asbpi = 0 126 while o < len(data) - 10: 127 part = data[o:o + 10] 128 if part == EMPTY: 129 bpioff = -((len(data) - o) % 10) 130 break 131 name, size, flags = struct.unpack('>4sLH', part) 132 size = BitPaddedInt(size) 133 o += 10 + size 134 if PY3: 135 try: 136 name = name.decode("ascii") 137 except UnicodeDecodeError: 138 continue 139 if name in frames: 140 asbpi += 1 141 else: 142 bpioff = o - len(data) 143 144 # count number of tags found as int and how far past 145 o = 0 146 asint = 0 147 while o < len(data) - 10: 148 part = data[o:o + 10] 149 if part == EMPTY: 150 intoff = -((len(data) - o) % 10) 151 break 152 name, size, flags = struct.unpack('>4sLH', part) 153 o += 10 + size 154 if PY3: 155 try: 156 name = name.decode("ascii") 157 except UnicodeDecodeError: 158 continue 159 if name in frames: 160 asint += 1 161 else: 162 intoff = o - len(data) 163 164 # if more tags as int, or equal and bpi is past and int is not 165 if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): 166 return int 167 return BitPaddedInt 168 169 170class ID3Tags(DictProxy, Tags): 171 172 __module__ = "mutagen.id3" 173 174 def __init__(self, *args, **kwargs): 175 self.unknown_frames = [] 176 self._unknown_v2_version = 4 177 super(ID3Tags, self).__init__(*args, **kwargs) 178 179 def _read(self, header, data): 180 frames, unknown_frames, data = read_frames( 181 header, data, header.known_frames) 182 for frame in frames: 183 self._add(frame, False) 184 self.unknown_frames = unknown_frames 185 self._unknown_v2_version = header.version[1] 186 return data 187 188 def _write(self, config): 189 # Sort frames by 'importance', then reverse frame size and then frame 190 # hash to get a stable result 191 order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] 192 193 framedata = [ 194 (f, save_frame(f, config=config)) for f in itervalues(self)] 195 196 def get_prio(frame): 197 try: 198 return order.index(frame.FrameID) 199 except ValueError: 200 return len(order) 201 202 def sort_key(items): 203 frame, data = items 204 return (get_prio(frame), len(data), frame.HashKey) 205 206 framedata = [d for (f, d) in sorted(framedata, key=sort_key)] 207 208 # only write unknown frames if they were loaded from the version 209 # we are saving with. Theoretically we could upgrade frames 210 # but some frames can be nested like CHAP, so there is a chance 211 # we create a mixed frame mess. 212 if self._unknown_v2_version == config.v2_version: 213 framedata.extend(data for data in self.unknown_frames 214 if len(data) > 10) 215 216 return bytearray().join(framedata) 217 218 def getall(self, key): 219 """Return all frames with a given name (the list may be empty). 220 221 Args: 222 key (text): key for frames to get 223 224 This is best explained by examples:: 225 226 id3.getall('TIT2') == [id3['TIT2']] 227 id3.getall('TTTT') == [] 228 id3.getall('TXXX') == [TXXX(desc='woo', text='bar'), 229 TXXX(desc='baz', text='quuuux'), ...] 230 231 Since this is based on the frame's HashKey, which is 232 colon-separated, you can use it to do things like 233 ``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``. 234 """ 235 if key in self: 236 return [self[key]] 237 else: 238 key = key + ":" 239 return [v for s, v in self.items() if s.startswith(key)] 240 241 def setall(self, key, values): 242 """Delete frames of the given type and add frames in 'values'. 243 244 Args: 245 key (text): key for frames to delete 246 values (list[Frame]): frames to add 247 """ 248 249 self.delall(key) 250 for tag in values: 251 self[tag.HashKey] = tag 252 253 def delall(self, key): 254 """Delete all tags of a given kind; see getall. 255 256 Args: 257 key (text): key for frames to delete 258 """ 259 260 if key in self: 261 del(self[key]) 262 else: 263 key = key + ":" 264 for k in list(self.keys()): 265 if k.startswith(key): 266 del(self[k]) 267 268 def pprint(self): 269 """ 270 Returns: 271 text: tags in a human-readable format. 272 273 "Human-readable" is used loosely here. The format is intended 274 to mirror that used for Vorbis or APEv2 output, e.g. 275 276 ``TIT2=My Title`` 277 278 However, ID3 frames can have multiple keys: 279 280 ``POPM=user@example.org=3 128/255`` 281 """ 282 283 frames = sorted(Frame.pprint(s) for s in self.values()) 284 return "\n".join(frames) 285 286 def _add(self, frame, strict): 287 """Add a frame. 288 289 Args: 290 frame (Frame): the frame to add 291 strict (bool): if this should raise in case it can't be added 292 and frames shouldn't be merged. 293 """ 294 295 if not isinstance(frame, Frame): 296 raise TypeError("%r not a Frame instance" % frame) 297 298 orig_frame = frame 299 frame = frame._upgrade_frame() 300 if frame is None: 301 if not strict: 302 return 303 raise TypeError( 304 "Can't upgrade %r frame" % type(orig_frame).__name__) 305 306 hash_key = frame.HashKey 307 if strict or hash_key not in self: 308 self[hash_key] = frame 309 return 310 311 # Try to merge frames, or change the new one. Since changing 312 # the new one can lead to new conflicts, try until everything is 313 # either merged or added. 314 while True: 315 old_frame = self[hash_key] 316 new_frame = old_frame._merge_frame(frame) 317 new_hash = new_frame.HashKey 318 if new_hash == hash_key: 319 self[hash_key] = new_frame 320 break 321 else: 322 assert new_frame is frame 323 if new_hash not in self: 324 self[new_hash] = new_frame 325 break 326 hash_key = new_hash 327 328 def loaded_frame(self, tag): 329 """Deprecated; use the add method.""" 330 331 self._add(tag, True) 332 333 def add(self, frame): 334 """Add a frame to the tag.""" 335 336 # add = loaded_frame (and vice versa) break applications that 337 # expect to be able to override loaded_frame (e.g. Quod Libet), 338 # as does making loaded_frame call add. 339 self.loaded_frame(frame) 340 341 def __setitem__(self, key, tag): 342 if not isinstance(tag, Frame): 343 raise TypeError("%r not a Frame instance" % tag) 344 super(ID3Tags, self).__setitem__(key, tag) 345 346 def __update_common(self): 347 """Updates done by both v23 and v24 update""" 348 349 if "TCON" in self: 350 # Get rid of "(xx)Foobr" format. 351 self["TCON"].genres = self["TCON"].genres 352 353 mimes = {"PNG": "image/png", "JPG": "image/jpeg"} 354 for pic in self.getall("APIC"): 355 if pic.mime in mimes: 356 newpic = APIC( 357 encoding=pic.encoding, mime=mimes[pic.mime], 358 type=pic.type, desc=pic.desc, data=pic.data) 359 self.add(newpic) 360 361 def update_to_v24(self): 362 """Convert older tags into an ID3v2.4 tag. 363 364 This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to 365 TDRC). If you intend to save tags, you must call this function 366 at some point; it is called by default when loading the tag. 367 """ 368 369 self.__update_common() 370 371 # TDAT, TYER, and TIME have been turned into TDRC. 372 try: 373 date = text_type(self.get("TYER", "")) 374 if date.strip(u"\x00"): 375 self.pop("TYER") 376 dat = text_type(self.get("TDAT", "")) 377 if dat.strip("\x00"): 378 self.pop("TDAT") 379 date = "%s-%s-%s" % (date, dat[2:], dat[:2]) 380 time = text_type(self.get("TIME", "")) 381 if time.strip("\x00"): 382 self.pop("TIME") 383 date += "T%s:%s:00" % (time[:2], time[2:]) 384 if "TDRC" not in self: 385 self.add(TDRC(encoding=0, text=date)) 386 except UnicodeDecodeError: 387 # Old ID3 tags have *lots* of Unicode problems, so if TYER 388 # is bad, just chuck the frames. 389 pass 390 391 # TORY can be the first part of a TDOR. 392 if "TORY" in self: 393 f = self.pop("TORY") 394 if "TDOR" not in self: 395 try: 396 self.add(TDOR(encoding=0, text=str(f))) 397 except UnicodeDecodeError: 398 pass 399 400 # IPLS is now TIPL. 401 if "IPLS" in self: 402 f = self.pop("IPLS") 403 if "TIPL" not in self: 404 self.add(TIPL(encoding=f.encoding, people=f.people)) 405 406 # These can't be trivially translated to any ID3v2.4 tags, or 407 # should have been removed already. 408 for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME"]: 409 if key in self: 410 del(self[key]) 411 412 # Recurse into chapters 413 for f in self.getall("CHAP"): 414 f.sub_frames.update_to_v24() 415 for f in self.getall("CTOC"): 416 f.sub_frames.update_to_v24() 417 418 def update_to_v23(self): 419 """Convert older (and newer) tags into an ID3v2.3 tag. 420 421 This updates incompatible ID3v2 frames to ID3v2.3 ones. If you 422 intend to save tags as ID3v2.3, you must call this function 423 at some point. 424 425 If you want to to go off spec and include some v2.4 frames 426 in v2.3, remove them before calling this and add them back afterwards. 427 """ 428 429 self.__update_common() 430 431 # TMCL, TIPL -> TIPL 432 if "TIPL" in self or "TMCL" in self: 433 people = [] 434 if "TIPL" in self: 435 f = self.pop("TIPL") 436 people.extend(f.people) 437 if "TMCL" in self: 438 f = self.pop("TMCL") 439 people.extend(f.people) 440 if "IPLS" not in self: 441 self.add(IPLS(encoding=f.encoding, people=people)) 442 443 # TDOR -> TORY 444 if "TDOR" in self: 445 f = self.pop("TDOR") 446 if f.text: 447 d = f.text[0] 448 if d.year and "TORY" not in self: 449 self.add(TORY(encoding=f.encoding, text="%04d" % d.year)) 450 451 # TDRC -> TYER, TDAT, TIME 452 if "TDRC" in self: 453 f = self.pop("TDRC") 454 if f.text: 455 d = f.text[0] 456 if d.year and "TYER" not in self: 457 self.add(TYER(encoding=f.encoding, text="%04d" % d.year)) 458 if d.month and d.day and "TDAT" not in self: 459 self.add(TDAT(encoding=f.encoding, 460 text="%02d%02d" % (d.day, d.month))) 461 if d.hour and d.minute and "TIME" not in self: 462 self.add(TIME(encoding=f.encoding, 463 text="%02d%02d" % (d.hour, d.minute))) 464 465 # New frames added in v2.4 466 v24_frames = [ 467 'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR', 468 'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO', 469 'TSOA', 'TSOP', 'TSOT', 'TSST', 470 ] 471 472 for key in v24_frames: 473 if key in self: 474 del(self[key]) 475 476 # Recurse into chapters 477 for f in self.getall("CHAP"): 478 f.sub_frames.update_to_v23() 479 for f in self.getall("CTOC"): 480 f.sub_frames.update_to_v23() 481 482 def _copy(self): 483 """Creates a shallow copy of all tags""" 484 485 items = self.items() 486 subs = {} 487 for f in (self.getall("CHAP") + self.getall("CTOC")): 488 subs[f.HashKey] = f.sub_frames._copy() 489 return (items, subs) 490 491 def _restore(self, value): 492 """Restores the state copied with _copy()""" 493 494 items, subs = value 495 self.clear() 496 for key, value in items: 497 self[key] = value 498 if key in subs: 499 value.sub_frames._restore(subs[key]) 500 501 502def save_frame(frame, name=None, config=None): 503 if config is None: 504 config = ID3SaveConfig() 505 506 flags = 0 507 if isinstance(frame, TextFrame): 508 if len(str(frame)) == 0: 509 return b'' 510 511 framedata = frame._writeData(config) 512 513 usize = len(framedata) 514 if usize > 2048: 515 # Disabled as this causes iTunes and other programs 516 # to fail to find these frames, which usually includes 517 # e.g. APIC. 518 # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') 519 # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN 520 pass 521 522 if config.v2_version == 4: 523 bits = 7 524 elif config.v2_version == 3: 525 bits = 8 526 else: 527 raise ValueError 528 529 datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits) 530 531 if name is not None: 532 assert isinstance(name, bytes) 533 frame_name = name 534 else: 535 frame_name = type(frame).__name__ 536 if PY3: 537 frame_name = frame_name.encode("ascii") 538 539 header = struct.pack('>4s4sH', frame_name, datasize, flags) 540 return header + framedata 541 542 543def read_frames(id3, data, frames): 544 """Does not error out""" 545 546 assert id3.version >= ID3Header._V22 547 548 result = [] 549 unsupported_frames = [] 550 551 if id3.version < ID3Header._V24 and id3.f_unsynch: 552 try: 553 data = unsynch.decode(data) 554 except ValueError: 555 pass 556 557 if id3.version >= ID3Header._V23: 558 if id3.version < ID3Header._V24: 559 bpi = int 560 else: 561 bpi = determine_bpi(data, frames) 562 563 while data: 564 header = data[:10] 565 try: 566 name, size, flags = struct.unpack('>4sLH', header) 567 except struct.error: 568 break # not enough header 569 if name.strip(b'\x00') == b'': 570 break 571 572 size = bpi(size) 573 framedata = data[10:10 + size] 574 data = data[10 + size:] 575 if size == 0: 576 continue # drop empty frames 577 578 if PY3: 579 try: 580 name = name.decode('ascii') 581 except UnicodeDecodeError: 582 continue 583 584 try: 585 # someone writes 2.3 frames with 2.2 names 586 if name[-1] == "\x00": 587 tag = Frames_2_2[name[:-1]] 588 name = tag.__base__.__name__ 589 590 tag = frames[name] 591 except KeyError: 592 if is_valid_frame_id(name): 593 unsupported_frames.append(header + framedata) 594 else: 595 try: 596 result.append(tag._fromData(id3, flags, framedata)) 597 except NotImplementedError: 598 unsupported_frames.append(header + framedata) 599 except ID3JunkFrameError: 600 pass 601 elif id3.version >= ID3Header._V22: 602 while data: 603 header = data[0:6] 604 try: 605 name, size = struct.unpack('>3s3s', header) 606 except struct.error: 607 break # not enough header 608 size, = struct.unpack('>L', b'\x00' + size) 609 if name.strip(b'\x00') == b'': 610 break 611 612 framedata = data[6:6 + size] 613 data = data[6 + size:] 614 if size == 0: 615 continue # drop empty frames 616 617 if PY3: 618 try: 619 name = name.decode('ascii') 620 except UnicodeDecodeError: 621 continue 622 623 try: 624 tag = frames[name] 625 except KeyError: 626 if is_valid_frame_id(name): 627 unsupported_frames.append(header + framedata) 628 else: 629 try: 630 result.append( 631 tag._fromData(id3, 0, framedata)) 632 except (ID3EncryptionUnsupportedError, 633 NotImplementedError): 634 unsupported_frames.append(header + framedata) 635 except ID3JunkFrameError: 636 pass 637 638 return result, unsupported_frames, data 639