1# -*- coding: utf-8 -*-
2# Copyright 2005  Michael Urman
3# Copyright 2016  Christoph Reiter
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9
10import struct
11
12from mutagen._tags import Tags
13from mutagen._util import DictProxy, convert_error, read_full
14from mutagen._compat import PY3, text_type, itervalues
15
16from ._util import BitPaddedInt, unsynch, ID3JunkFrameError, \
17    ID3EncryptionUnsupportedError, is_valid_frame_id, error, \
18    ID3NoHeaderError, ID3UnsupportedVersionError, ID3SaveConfig
19from ._frames import TDRC, APIC, TDOR, TIME, TIPL, TORY, TDAT, Frames_2_2, \
20    TextFrame, TYER, Frame, IPLS, Frames
21
22
23class ID3Header(object):
24
25    _V24 = (2, 4, 0)
26    _V23 = (2, 3, 0)
27    _V22 = (2, 2, 0)
28    _V11 = (1, 1)
29
30    f_unsynch = property(lambda s: bool(s._flags & 0x80))
31    f_extended = property(lambda s: bool(s._flags & 0x40))
32    f_experimental = property(lambda s: bool(s._flags & 0x20))
33    f_footer = property(lambda s: bool(s._flags & 0x10))
34
35    _known_frames = None
36
37    @property
38    def known_frames(self):
39        if self._known_frames is not None:
40            return self._known_frames
41        elif self.version >= ID3Header._V23:
42            return Frames
43        elif self.version >= ID3Header._V22:
44            return Frames_2_2
45
46    @convert_error(IOError, error)
47    def __init__(self, fileobj=None):
48        """Raises ID3NoHeaderError, ID3UnsupportedVersionError or error"""
49
50        if fileobj is None:
51            # for testing
52            self._flags = 0
53            return
54
55        fn = getattr(fileobj, "name", "<unknown>")
56        data = fileobj.read(10)
57        if len(data) != 10:
58            raise ID3NoHeaderError("%s: too small" % fn)
59
60        id3, vmaj, vrev, flags, size = struct.unpack('>3sBBB4s', data)
61        self._flags = flags
62        self.size = BitPaddedInt(size) + 10
63        self.version = (2, vmaj, vrev)
64
65        if id3 != b'ID3':
66            raise ID3NoHeaderError("%r doesn't start with an ID3 tag" % fn)
67
68        if vmaj not in [2, 3, 4]:
69            raise ID3UnsupportedVersionError("%r ID3v2.%d not supported"
70                                             % (fn, vmaj))
71
72        if not BitPaddedInt.has_valid_padding(size):
73            raise error("Header size not synchsafe")
74
75        if (self.version >= self._V24) and (flags & 0x0f):
76            raise error(
77                "%r has invalid flags %#02x" % (fn, flags))
78        elif (self._V23 <= self.version < self._V24) and (flags & 0x1f):
79            raise error(
80                "%r has invalid flags %#02x" % (fn, flags))
81
82        if self.f_extended:
83            extsize_data = read_full(fileobj, 4)
84
85            if PY3:
86                frame_id = extsize_data.decode("ascii", "replace")
87            else:
88                frame_id = extsize_data
89
90            if frame_id in Frames:
91                # Some tagger sets the extended header flag but
92                # doesn't write an extended header; in this case, the
93                # ID3 data follows immediately. Since no extended
94                # header is going to be long enough to actually match
95                # a frame, and if it's *not* a frame we're going to be
96                # completely lost anyway, this seems to be the most
97                # correct check.
98                # https://github.com/quodlibet/quodlibet/issues/126
99                self._flags ^= 0x40
100                extsize = 0
101                fileobj.seek(-4, 1)
102            elif self.version >= self._V24:
103                # "Where the 'Extended header size' is the size of the whole
104                # extended header, stored as a 32 bit synchsafe integer."
105                extsize = BitPaddedInt(extsize_data) - 4
106                if not BitPaddedInt.has_valid_padding(extsize_data):
107                    raise error(
108                        "Extended header size not synchsafe")
109            else:
110                # "Where the 'Extended header size', currently 6 or 10 bytes,
111                # excludes itself."
112                extsize = struct.unpack('>L', extsize_data)[0]
113
114            self._extdata = read_full(fileobj, extsize)
115
116
117def determine_bpi(data, frames, EMPTY=b"\x00" * 10):
118    """Takes id3v2.4 frame data and determines if ints or bitpaddedints
119    should be used for parsing. Needed because iTunes used to write
120    normal ints for frame sizes.
121    """
122
123    # count number of tags found as BitPaddedInt and how far past
124    o = 0
125    asbpi = 0
126    while o < len(data) - 10:
127        part = data[o:o + 10]
128        if part == EMPTY:
129            bpioff = -((len(data) - o) % 10)
130            break
131        name, size, flags = struct.unpack('>4sLH', part)
132        size = BitPaddedInt(size)
133        o += 10 + size
134        if PY3:
135            try:
136                name = name.decode("ascii")
137            except UnicodeDecodeError:
138                continue
139        if name in frames:
140            asbpi += 1
141    else:
142        bpioff = o - len(data)
143
144    # count number of tags found as int and how far past
145    o = 0
146    asint = 0
147    while o < len(data) - 10:
148        part = data[o:o + 10]
149        if part == EMPTY:
150            intoff = -((len(data) - o) % 10)
151            break
152        name, size, flags = struct.unpack('>4sLH', part)
153        o += 10 + size
154        if PY3:
155            try:
156                name = name.decode("ascii")
157            except UnicodeDecodeError:
158                continue
159        if name in frames:
160            asint += 1
161    else:
162        intoff = o - len(data)
163
164    # if more tags as int, or equal and bpi is past and int is not
165    if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)):
166        return int
167    return BitPaddedInt
168
169
170class ID3Tags(DictProxy, Tags):
171
172    __module__ = "mutagen.id3"
173
174    def __init__(self, *args, **kwargs):
175        self.unknown_frames = []
176        self._unknown_v2_version = 4
177        super(ID3Tags, self).__init__(*args, **kwargs)
178
179    def _read(self, header, data):
180        frames, unknown_frames, data = read_frames(
181            header, data, header.known_frames)
182        for frame in frames:
183            self._add(frame, False)
184        self.unknown_frames = unknown_frames
185        self._unknown_v2_version = header.version[1]
186        return data
187
188    def _write(self, config):
189        # Sort frames by 'importance', then reverse frame size and then frame
190        # hash to get a stable result
191        order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"]
192
193        framedata = [
194            (f, save_frame(f, config=config)) for f in itervalues(self)]
195
196        def get_prio(frame):
197            try:
198                return order.index(frame.FrameID)
199            except ValueError:
200                return len(order)
201
202        def sort_key(items):
203            frame, data = items
204            return (get_prio(frame), len(data), frame.HashKey)
205
206        framedata = [d for (f, d) in sorted(framedata, key=sort_key)]
207
208        # only write unknown frames if they were loaded from the version
209        # we are saving with. Theoretically we could upgrade frames
210        # but some frames can be nested like CHAP, so there is a chance
211        # we create a mixed frame mess.
212        if self._unknown_v2_version == config.v2_version:
213            framedata.extend(data for data in self.unknown_frames
214                             if len(data) > 10)
215
216        return bytearray().join(framedata)
217
218    def getall(self, key):
219        """Return all frames with a given name (the list may be empty).
220
221        Args:
222            key (text): key for frames to get
223
224        This is best explained by examples::
225
226            id3.getall('TIT2') == [id3['TIT2']]
227            id3.getall('TTTT') == []
228            id3.getall('TXXX') == [TXXX(desc='woo', text='bar'),
229                                   TXXX(desc='baz', text='quuuux'), ...]
230
231        Since this is based on the frame's HashKey, which is
232        colon-separated, you can use it to do things like
233        ``getall('COMM:MusicMatch')`` or ``getall('TXXX:QuodLibet:')``.
234        """
235        if key in self:
236            return [self[key]]
237        else:
238            key = key + ":"
239            return [v for s, v in self.items() if s.startswith(key)]
240
241    def setall(self, key, values):
242        """Delete frames of the given type and add frames in 'values'.
243
244        Args:
245            key (text): key for frames to delete
246            values (list[Frame]): frames to add
247        """
248
249        self.delall(key)
250        for tag in values:
251            self[tag.HashKey] = tag
252
253    def delall(self, key):
254        """Delete all tags of a given kind; see getall.
255
256        Args:
257            key (text): key for frames to delete
258        """
259
260        if key in self:
261            del(self[key])
262        else:
263            key = key + ":"
264            for k in list(self.keys()):
265                if k.startswith(key):
266                    del(self[k])
267
268    def pprint(self):
269        """
270        Returns:
271            text: tags in a human-readable format.
272
273        "Human-readable" is used loosely here. The format is intended
274        to mirror that used for Vorbis or APEv2 output, e.g.
275
276            ``TIT2=My Title``
277
278        However, ID3 frames can have multiple keys:
279
280            ``POPM=user@example.org=3 128/255``
281        """
282
283        frames = sorted(Frame.pprint(s) for s in self.values())
284        return "\n".join(frames)
285
286    def _add(self, frame, strict):
287        """Add a frame.
288
289        Args:
290            frame (Frame): the frame to add
291            strict (bool): if this should raise in case it can't be added
292                and frames shouldn't be merged.
293        """
294
295        if not isinstance(frame, Frame):
296            raise TypeError("%r not a Frame instance" % frame)
297
298        orig_frame = frame
299        frame = frame._upgrade_frame()
300        if frame is None:
301            if not strict:
302                return
303            raise TypeError(
304                "Can't upgrade %r frame" % type(orig_frame).__name__)
305
306        hash_key = frame.HashKey
307        if strict or hash_key not in self:
308            self[hash_key] = frame
309            return
310
311        # Try to merge frames, or change the new one. Since changing
312        # the new one can lead to new conflicts, try until everything is
313        # either merged or added.
314        while True:
315            old_frame = self[hash_key]
316            new_frame = old_frame._merge_frame(frame)
317            new_hash = new_frame.HashKey
318            if new_hash == hash_key:
319                self[hash_key] = new_frame
320                break
321            else:
322                assert new_frame is frame
323                if new_hash not in self:
324                    self[new_hash] = new_frame
325                    break
326                hash_key = new_hash
327
328    def loaded_frame(self, tag):
329        """Deprecated; use the add method."""
330
331        self._add(tag, True)
332
333    def add(self, frame):
334        """Add a frame to the tag."""
335
336        # add = loaded_frame (and vice versa) break applications that
337        # expect to be able to override loaded_frame (e.g. Quod Libet),
338        # as does making loaded_frame call add.
339        self.loaded_frame(frame)
340
341    def __setitem__(self, key, tag):
342        if not isinstance(tag, Frame):
343            raise TypeError("%r not a Frame instance" % tag)
344        super(ID3Tags, self).__setitem__(key, tag)
345
346    def __update_common(self):
347        """Updates done by both v23 and v24 update"""
348
349        if "TCON" in self:
350            # Get rid of "(xx)Foobr" format.
351            self["TCON"].genres = self["TCON"].genres
352
353        mimes = {"PNG": "image/png", "JPG": "image/jpeg"}
354        for pic in self.getall("APIC"):
355            if pic.mime in mimes:
356                newpic = APIC(
357                    encoding=pic.encoding, mime=mimes[pic.mime],
358                    type=pic.type, desc=pic.desc, data=pic.data)
359                self.add(newpic)
360
361    def update_to_v24(self):
362        """Convert older tags into an ID3v2.4 tag.
363
364        This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to
365        TDRC). If you intend to save tags, you must call this function
366        at some point; it is called by default when loading the tag.
367        """
368
369        self.__update_common()
370
371        # TDAT, TYER, and TIME have been turned into TDRC.
372        try:
373            date = text_type(self.get("TYER", ""))
374            if date.strip(u"\x00"):
375                self.pop("TYER")
376                dat = text_type(self.get("TDAT", ""))
377                if dat.strip("\x00"):
378                    self.pop("TDAT")
379                    date = "%s-%s-%s" % (date, dat[2:], dat[:2])
380                    time = text_type(self.get("TIME", ""))
381                    if time.strip("\x00"):
382                        self.pop("TIME")
383                        date += "T%s:%s:00" % (time[:2], time[2:])
384                if "TDRC" not in self:
385                    self.add(TDRC(encoding=0, text=date))
386        except UnicodeDecodeError:
387            # Old ID3 tags have *lots* of Unicode problems, so if TYER
388            # is bad, just chuck the frames.
389            pass
390
391        # TORY can be the first part of a TDOR.
392        if "TORY" in self:
393            f = self.pop("TORY")
394            if "TDOR" not in self:
395                try:
396                    self.add(TDOR(encoding=0, text=str(f)))
397                except UnicodeDecodeError:
398                    pass
399
400        # IPLS is now TIPL.
401        if "IPLS" in self:
402            f = self.pop("IPLS")
403            if "TIPL" not in self:
404                self.add(TIPL(encoding=f.encoding, people=f.people))
405
406        # These can't be trivially translated to any ID3v2.4 tags, or
407        # should have been removed already.
408        for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME"]:
409            if key in self:
410                del(self[key])
411
412        # Recurse into chapters
413        for f in self.getall("CHAP"):
414            f.sub_frames.update_to_v24()
415        for f in self.getall("CTOC"):
416            f.sub_frames.update_to_v24()
417
418    def update_to_v23(self):
419        """Convert older (and newer) tags into an ID3v2.3 tag.
420
421        This updates incompatible ID3v2 frames to ID3v2.3 ones. If you
422        intend to save tags as ID3v2.3, you must call this function
423        at some point.
424
425        If you want to to go off spec and include some v2.4 frames
426        in v2.3, remove them before calling this and add them back afterwards.
427        """
428
429        self.__update_common()
430
431        # TMCL, TIPL -> TIPL
432        if "TIPL" in self or "TMCL" in self:
433            people = []
434            if "TIPL" in self:
435                f = self.pop("TIPL")
436                people.extend(f.people)
437            if "TMCL" in self:
438                f = self.pop("TMCL")
439                people.extend(f.people)
440            if "IPLS" not in self:
441                self.add(IPLS(encoding=f.encoding, people=people))
442
443        # TDOR -> TORY
444        if "TDOR" in self:
445            f = self.pop("TDOR")
446            if f.text:
447                d = f.text[0]
448                if d.year and "TORY" not in self:
449                    self.add(TORY(encoding=f.encoding, text="%04d" % d.year))
450
451        # TDRC -> TYER, TDAT, TIME
452        if "TDRC" in self:
453            f = self.pop("TDRC")
454            if f.text:
455                d = f.text[0]
456                if d.year and "TYER" not in self:
457                    self.add(TYER(encoding=f.encoding, text="%04d" % d.year))
458                if d.month and d.day and "TDAT" not in self:
459                    self.add(TDAT(encoding=f.encoding,
460                                  text="%02d%02d" % (d.day, d.month)))
461                if d.hour and d.minute and "TIME" not in self:
462                    self.add(TIME(encoding=f.encoding,
463                                  text="%02d%02d" % (d.hour, d.minute)))
464
465        # New frames added in v2.4
466        v24_frames = [
467            'ASPI', 'EQU2', 'RVA2', 'SEEK', 'SIGN', 'TDEN', 'TDOR',
468            'TDRC', 'TDRL', 'TDTG', 'TIPL', 'TMCL', 'TMOO', 'TPRO',
469            'TSOA', 'TSOP', 'TSOT', 'TSST',
470        ]
471
472        for key in v24_frames:
473            if key in self:
474                del(self[key])
475
476        # Recurse into chapters
477        for f in self.getall("CHAP"):
478            f.sub_frames.update_to_v23()
479        for f in self.getall("CTOC"):
480            f.sub_frames.update_to_v23()
481
482    def _copy(self):
483        """Creates a shallow copy of all tags"""
484
485        items = self.items()
486        subs = {}
487        for f in (self.getall("CHAP") + self.getall("CTOC")):
488            subs[f.HashKey] = f.sub_frames._copy()
489        return (items, subs)
490
491    def _restore(self, value):
492        """Restores the state copied with _copy()"""
493
494        items, subs = value
495        self.clear()
496        for key, value in items:
497            self[key] = value
498            if key in subs:
499                value.sub_frames._restore(subs[key])
500
501
502def save_frame(frame, name=None, config=None):
503    if config is None:
504        config = ID3SaveConfig()
505
506    flags = 0
507    if isinstance(frame, TextFrame):
508        if len(str(frame)) == 0:
509            return b''
510
511    framedata = frame._writeData(config)
512
513    usize = len(framedata)
514    if usize > 2048:
515        # Disabled as this causes iTunes and other programs
516        # to fail to find these frames, which usually includes
517        # e.g. APIC.
518        # framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib')
519        # flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN
520        pass
521
522    if config.v2_version == 4:
523        bits = 7
524    elif config.v2_version == 3:
525        bits = 8
526    else:
527        raise ValueError
528
529    datasize = BitPaddedInt.to_str(len(framedata), width=4, bits=bits)
530
531    if name is not None:
532        assert isinstance(name, bytes)
533        frame_name = name
534    else:
535        frame_name = type(frame).__name__
536        if PY3:
537            frame_name = frame_name.encode("ascii")
538
539    header = struct.pack('>4s4sH', frame_name, datasize, flags)
540    return header + framedata
541
542
543def read_frames(id3, data, frames):
544    """Does not error out"""
545
546    assert id3.version >= ID3Header._V22
547
548    result = []
549    unsupported_frames = []
550
551    if id3.version < ID3Header._V24 and id3.f_unsynch:
552        try:
553            data = unsynch.decode(data)
554        except ValueError:
555            pass
556
557    if id3.version >= ID3Header._V23:
558        if id3.version < ID3Header._V24:
559            bpi = int
560        else:
561            bpi = determine_bpi(data, frames)
562
563        while data:
564            header = data[:10]
565            try:
566                name, size, flags = struct.unpack('>4sLH', header)
567            except struct.error:
568                break  # not enough header
569            if name.strip(b'\x00') == b'':
570                break
571
572            size = bpi(size)
573            framedata = data[10:10 + size]
574            data = data[10 + size:]
575            if size == 0:
576                continue  # drop empty frames
577
578            if PY3:
579                try:
580                    name = name.decode('ascii')
581                except UnicodeDecodeError:
582                    continue
583
584            try:
585                # someone writes 2.3 frames with 2.2 names
586                if name[-1] == "\x00":
587                    tag = Frames_2_2[name[:-1]]
588                    name = tag.__base__.__name__
589
590                tag = frames[name]
591            except KeyError:
592                if is_valid_frame_id(name):
593                    unsupported_frames.append(header + framedata)
594            else:
595                try:
596                    result.append(tag._fromData(id3, flags, framedata))
597                except NotImplementedError:
598                    unsupported_frames.append(header + framedata)
599                except ID3JunkFrameError:
600                    pass
601    elif id3.version >= ID3Header._V22:
602        while data:
603            header = data[0:6]
604            try:
605                name, size = struct.unpack('>3s3s', header)
606            except struct.error:
607                break  # not enough header
608            size, = struct.unpack('>L', b'\x00' + size)
609            if name.strip(b'\x00') == b'':
610                break
611
612            framedata = data[6:6 + size]
613            data = data[6 + size:]
614            if size == 0:
615                continue  # drop empty frames
616
617            if PY3:
618                try:
619                    name = name.decode('ascii')
620                except UnicodeDecodeError:
621                    continue
622
623            try:
624                tag = frames[name]
625            except KeyError:
626                if is_valid_frame_id(name):
627                    unsupported_frames.append(header + framedata)
628            else:
629                try:
630                    result.append(
631                        tag._fromData(id3, 0, framedata))
632                except (ID3EncryptionUnsupportedError,
633                        NotImplementedError):
634                    unsupported_frames.append(header + framedata)
635                except ID3JunkFrameError:
636                    pass
637
638    return result, unsupported_frames, data
639