1# Copyright 2004-2013 Joe Wreschnig, Michael Urman, Niklas Janlert,
2#                     Steven Robertson, Nick Boultbee
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8
9import mutagen.id3
10
11from quodlibet import config, const, print_d
12from quodlibet import util
13from quodlibet.util.iso639 import ISO_639_2
14from quodlibet.util.path import get_temp_cover_file
15from quodlibet.util.string import isascii
16
17from ._audio import AudioFile, translate_errors, AudioFileError
18from ._image import EmbeddedImage, APICType
19
20
21def encoding_for(s):
22    """Returns ID3 encoding ID best for string `s`"""
23    return 3 if isascii(s) else 1
24
25
26RG_KEYS = [
27    "replaygain_track_peak", "replaygain_track_gain",
28    "replaygain_album_peak", "replaygain_album_gain",
29]
30
31
32# ID3 is absolutely the worst thing ever.
33class ID3File(AudioFile):
34
35    # http://www.unixgods.org/~tilo/ID3/docs/ID3_comparison.html
36    # http://www.id3.org/id3v2.4.0-frames.txt
37    IDS = {"TIT1": "grouping",
38           "TIT2": "title",
39           "TIT3": "version",
40           "TPE1": "artist",
41           "TPE2": "performer",
42           "TPE3": "conductor",
43           "TPE4": "arranger",
44           "TEXT": "lyricist",
45           "TCOM": "composer",
46           "TENC": "encodedby",
47           "TALB": "album",
48           "TRCK": "tracknumber",
49           "TPOS": "discnumber",
50           "TSRC": "isrc",
51           "TCOP": "copyright",
52           "TPUB": "organization",
53           "TSST": "discsubtitle",
54           "TOLY": "author",
55           "TMOO": "mood",
56           "TBPM": "bpm",
57           "TDRC": "date",
58           "TDOR": "originaldate",
59           "TOAL": "originalalbum",
60           "TOPE": "originalartist",
61           "WOAR": "website",
62           "TSOP": "artistsort",
63           "TSOA": "albumsort",
64           "TSOT": "titlesort",
65           "TSO2": "albumartistsort",
66           "TSOC": "composersort",
67           "TMED": "media",
68           "TCMP": "compilation",
69           # TLAN requires an ISO 639-2 language code, check manually
70           #"TLAN": "language"
71    }
72    SDI = dict([(v, k) for k, v in IDS.items()])
73
74    # At various times, information for this came from
75    # http://musicbrainz.org/docs/specs/metadata_tags.html
76    # http://bugs.musicbrainz.org/ticket/1383
77    # http://musicbrainz.org/doc/MusicBrainzTag
78    TXXX_MAP = {
79        u"MusicBrainz Release Group Id": "musicbrainz_releasegroupid",
80        u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid",
81        u"MusicBrainz Artist Id": "musicbrainz_artistid",
82        u"MusicBrainz Album Id": "musicbrainz_albumid",
83        u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid",
84        u"MusicBrainz TRM Id": "musicbrainz_trmid",
85        u"MusicIP PUID": "musicip_puid",
86        u"MusicMagic Fingerprint": "musicip_fingerprint",
87        u"MusicBrainz Album Status": "musicbrainz_albumstatus",
88        u"MusicBrainz Album Type": "musicbrainz_albumtype",
89        u"MusicBrainz Album Release Country": "releasecountry",
90        u"MusicBrainz Disc Id": "musicbrainz_discid",
91        u"ASIN": "asin",
92        u"ALBUMARTISTSORT": "albumartistsort",
93        u"BARCODE": "barcode",
94        }
95    PAM_XXXT = dict([(v, k) for k, v in TXXX_MAP.items()])
96
97    Kind = None
98
99    def __init__(self, filename):
100        with translate_errors():
101            audio = self.Kind(filename)
102        if audio.tags is None:
103            audio.add_tags()
104        tag = audio.tags
105
106        self._parse_info(audio.info)
107
108        for frame in tag.values():
109            if frame.FrameID == "APIC" and len(frame.data):
110                self.has_images = True
111                continue
112            elif frame.FrameID == "TCON":
113                self["genre"] = "\n".join(frame.genres)
114                continue
115            elif (frame.FrameID == "UFID" and
116                  frame.owner == "http://musicbrainz.org"):
117                self["musicbrainz_trackid"] = frame.data.decode("utf-8",
118                                                                "replace")
119                continue
120            elif frame.FrameID == "POPM":
121                rating = frame.rating / 255.0
122                if frame.email == const.EMAIL:
123                    try:
124                        self.setdefault("~#playcount", frame.count)
125                    except AttributeError:
126                        pass
127                    self.setdefault("~#rating", rating)
128                elif frame.email == config.get("editing", "save_email"):
129                    try:
130                        self["~#playcount"] = frame.count
131                    except AttributeError:
132                        pass
133                    self["~#rating"] = rating
134                continue
135            elif frame.FrameID == "COMM" and frame.desc == "":
136                name = "comment"
137            elif frame.FrameID in ["COMM", "TXXX"]:
138                if frame.desc.startswith("QuodLibet::"):
139                    name = frame.desc[11:]
140                elif frame.desc in self.TXXX_MAP:
141                    name = self.TXXX_MAP[frame.desc]
142                else:
143                    continue
144            elif frame.FrameID == "RVA2":
145                self.__process_rg(frame)
146                continue
147            elif frame.FrameID == "TMCL":
148                for role, name in frame.people:
149                    key = self.__validate_name("performer:" + role)
150                    if key:
151                        self.add(key, name)
152                continue
153            elif frame.FrameID == "TLAN":
154                self["language"] = "\n".join(frame.text)
155                continue
156            elif frame.FrameID == "USLT":
157                name = "lyrics"
158            else:
159                name = self.IDS.get(frame.FrameID, "").lower()
160
161            name = self.__validate_name(name)
162            if not name:
163                continue
164            name = name.lower()
165
166            id3id = frame.FrameID
167            if id3id.startswith("T"):
168                text = "\n".join(map(str, frame.text))
169            elif id3id == "COMM":
170                text = "\n".join(frame.text)
171            elif id3id == "USLT":
172                # lyrics are single string, not list
173                text = frame.text
174            elif id3id.startswith("W"):
175                text = frame.url
176                frame.encoding = 0
177            else:
178                continue
179
180            if not text:
181                continue
182            text = self.__distrust_latin1(text, frame.encoding)
183            if text is None:
184                continue
185
186            if name in self:
187                self[name] += "\n" + text
188            else:
189                self[name] = text
190            self[name] = self[name].strip()
191
192            # to catch a missing continue above
193            del name
194
195        # foobar2000 writes long dates in a TXXX DATE tag, leaving the TDRC
196        # tag out. Read the TXXX DATE, but only if the TDRC tag doesn't exist
197        # to avoid reverting or duplicating tags in existing libraries.
198        if audio.tags and "date" not in self:
199            for frame in tag.getall('TXXX:DATE'):
200                self["date"] = "\n".join(map(str, frame.text))
201
202        # Read TXXX replaygain and replace previously read values from RVA2
203        for frame in tag.getall("TXXX"):
204            k = frame.desc.lower()
205            if k in RG_KEYS:
206                self[str(k)] = u"\n".join(map(str, frame.text))
207
208        self.sanitize(filename)
209
210    def _parse_info(self, info):
211        """Optionally implement in subclasses"""
212
213        pass
214
215    def __validate_name(self, k):
216        """Returns a ascii string or None if the key isn't supported"""
217
218        if not k or "=" in k or "~" in k:
219            return
220
221        if not (k and "=" not in k and "~" not in k
222                and k.encode("ascii", "replace").decode("ascii") == k):
223            return
224
225        return k
226
227    def __process_rg(self, frame):
228        if frame.channel == 1:
229            if frame.desc == "album":
230                k = "album"
231            elif frame.desc == "track":
232                k = "track"
233            elif "replaygain_track_gain" not in self:
234                k = "track"  # fallback
235            else:
236                return
237            self["replaygain_%s_gain" % k] = "%+f dB" % frame.gain
238            self["replaygain_%s_peak" % k] = str(frame.peak)
239
240    @util.cached_property
241    def CODECS(self):
242        codecs = ["utf-8"]
243        codecs_conf = config.get("editing", "id3encoding")
244        codecs.extend(codecs_conf.strip().split())
245        codecs.append("iso-8859-1")
246        return codecs
247
248    def __distrust_latin1(self, text, encoding):
249        assert isinstance(text, str)
250        if encoding == 0:
251            try:
252                text = text.encode('iso-8859-1')
253            except UnicodeEncodeError:
254                # mutagen might give us text not matching the encoding
255                # https://github.com/quodlibet/mutagen/issues/307
256                return text
257            for codec in self.CODECS:
258                try:
259                    text = text.decode(codec)
260                except (UnicodeError, LookupError):
261                    pass
262                else:
263                    break
264            else:
265                return None
266        return text
267
268    def write(self):
269        with translate_errors():
270            audio = self.Kind(self['~filename'])
271
272        if audio.tags is None:
273            audio.add_tags()
274        tag = audio.tags
275
276        # prefill TMCL with the ones we can't read
277        mcl = tag.get("TMCL", mutagen.id3.TMCL(encoding=3, people=[]))
278        mcl.people = [(r, n) for (r, n) in mcl.people
279                      if not self.__validate_name(r)]
280
281        # delete all TXXX/COMM we can read except empty COMM
282        for frame in ["COMM:", "TXXX:"]:
283            for t in tag.getall(frame + "QuodLibet:"):
284                if t.desc and self.__validate_name(t.desc):
285                    del tag[t.HashKey]
286
287        for key in ["UFID:http://musicbrainz.org",
288                    "TMCL",
289                    "POPM:%s" % const.EMAIL,
290                    "POPM:%s" % config.get("editing", "save_email")]:
291            if key in tag:
292                del(tag[key])
293
294        for key, id3name in self.SDI.items():
295            tag.delall(id3name)
296            if key not in self:
297                continue
298            enc = encoding_for(self[key])
299            Kind = mutagen.id3.Frames[id3name]
300            text = self[key].split("\n")
301            if id3name == "WOAR":
302                for t in text:
303                    tag.add(Kind(url=t))
304            else:
305                tag.add(Kind(encoding=enc, text=text))
306
307        dontwrite = ["genre", "comment", "musicbrainz_trackid", "lyrics"] \
308            + RG_KEYS + list(self.TXXX_MAP.values())
309
310        if "musicbrainz_trackid" in self.realkeys():
311            f = mutagen.id3.UFID(
312                owner="http://musicbrainz.org",
313                data=self["musicbrainz_trackid"].encode("utf-8"))
314            tag.add(f)
315
316        # Issue 439 - Only write valid ISO 639-2 codes to TLAN (else TXXX)
317        tag.delall("TLAN")
318        if "language" in self:
319            langs = self["language"].split("\n")
320            if all([lang in ISO_639_2 for lang in langs]):
321                # Save value(s) to TLAN tag. Guaranteed to be ASCII here
322                tag.add(mutagen.id3.TLAN(encoding=3, text=langs))
323                dontwrite.append("language")
324            else:
325                print_d("Not using invalid language code '%s' in TLAN" %
326                        self["language"])
327
328        # Filter out known keys, and ones set not to write [generically].
329        keys_to_write = filter(lambda k: not (k in self.SDI or k in dontwrite),
330                               self.realkeys())
331        for key in keys_to_write:
332            enc = encoding_for(self[key])
333            if key.startswith("performer:"):
334                mcl.people.append([key.split(":", 1)[1], self[key]])
335                continue
336
337            f = mutagen.id3.TXXX(
338                encoding=enc, text=self[key].split("\n"),
339                desc=u"QuodLibet::%s" % key)
340            tag.add(f)
341
342        if mcl.people:
343            tag.add(mcl)
344
345        if "genre" in self:
346            enc = encoding_for(self["genre"])
347            t = self["genre"].split("\n")
348            tag.add(mutagen.id3.TCON(encoding=enc, text=t))
349        else:
350            try:
351                del(tag["TCON"])
352            except KeyError:
353                pass
354
355        tag.delall("COMM:")
356        if "comment" in self:
357            enc = encoding_for(self["comment"])
358            t = self["comment"].split("\n")
359            tag.add(mutagen.id3.COMM(encoding=enc, text=t, desc=u"",
360                                     lang="\x00\x00\x00"))
361
362        tag.delall("USLT")
363        if "lyrics" in self:
364            enc = encoding_for(self["lyrics"])
365            # lyrics are single string, not array
366            tag.add(mutagen.id3.USLT(encoding=enc, text=self["lyrics"],
367                                     desc=u"", lang="\x00\x00\x00"))
368
369        # Delete old foobar replaygain ..
370        for frame in tag.getall("TXXX"):
371            if frame.desc.lower() in RG_KEYS:
372                del tag[frame.HashKey]
373
374        # .. write new one
375        for k in RG_KEYS:
376            # Add new ones
377            if k in self:
378                value = self[k]
379                tag.add(mutagen.id3.TXXX(encoding=encoding_for(value),
380                                         text=value.split("\n"),
381                                         desc=k))
382
383        # we shouldn't delete all, but we use unknown ones as fallback, so make
384        # sure they don't come back after reloading
385        for t in tag.getall("RVA2"):
386            if t.channel == 1:
387                del tag[t.HashKey]
388
389        for k in ["track", "album"]:
390            if ('replaygain_%s_gain' % k) in self:
391                try:
392                    gain = float(self["replaygain_%s_gain" % k].split()[0])
393                except (ValueError, IndexError):
394                    gain = 0
395                try:
396                    peak = float(self["replaygain_%s_peak" % k])
397                except (ValueError, KeyError):
398                    peak = 0
399                # https://github.com/quodlibet/quodlibet/issues/1027
400                peak = max(min(1.9, peak), 0)
401                gain = max(min(63.9, gain), -64)
402                f = mutagen.id3.RVA2(desc=k, channel=1, gain=gain, peak=peak)
403                tag.add(f)
404
405        for key in self.TXXX_MAP:
406            try:
407                del(tag["TXXX:" + key])
408            except KeyError:
409                pass
410        for key in self.PAM_XXXT:
411            if key in self.SDI:
412                # we already write it back using non-TXXX frames
413                continue
414            if key in self:
415                value = self[key]
416                f = mutagen.id3.TXXX(encoding=encoding_for(value),
417                                     text=value.split("\n"),
418                                     desc=self.PAM_XXXT[key])
419                tag.add(f)
420
421        if (config.getboolean("editing", "save_to_songs") and
422                (self.has_rating or self.get("~#playcount", 0) != 0)):
423            email = config.get("editing", "save_email").strip()
424            email = email or const.EMAIL
425            t = mutagen.id3.POPM(email=email,
426                                 rating=int(255 * self("~#rating")),
427                                 count=self.get("~#playcount", 0))
428            tag.add(t)
429
430        with translate_errors():
431            audio.save()
432        self.sanitize()
433
434    can_change_images = True
435
436    def clear_images(self):
437        """Delete all embedded images"""
438
439        with translate_errors():
440            audio = self.Kind(self["~filename"])
441
442            if audio.tags is not None:
443                audio.tags.delall("APIC")
444                audio.save()
445
446        self.has_images = False
447
448    def get_images(self):
449        """Returns a list of embedded images"""
450
451        images = []
452
453        try:
454            with translate_errors():
455                audio = self.Kind(self["~filename"])
456        except AudioFileError:
457            return images
458
459        tag = audio.tags
460        if tag is None:
461            return images
462
463        for frame in tag.getall("APIC"):
464            f = get_temp_cover_file(frame.data)
465            images.append(EmbeddedImage(f, frame.mime, type_=frame.type))
466
467        images.sort(key=lambda c: c.sort_key)
468        return images
469
470    def get_primary_image(self):
471        """Returns the primary embedded image"""
472
473        try:
474            with translate_errors():
475                audio = self.Kind(self["~filename"])
476        except AudioFileError:
477            return
478
479        tag = audio.tags
480        if tag is None:
481            return
482
483        # get the APIC frame with type == 3 (cover) or the first one
484        cover = None
485        for frame in tag.getall("APIC"):
486            cover = cover or frame
487            if frame.type == APICType.COVER_FRONT:
488                cover = frame
489                break
490
491        if cover:
492            f = get_temp_cover_file(cover.data)
493            return EmbeddedImage(f, cover.mime, type_=cover.type)
494
495    def set_image(self, image):
496        """Replaces all embedded images by the passed image"""
497
498        with translate_errors():
499            audio = self.Kind(self["~filename"])
500
501        if audio.tags is None:
502            audio.add_tags()
503
504        tag = audio.tags
505
506        try:
507            data = image.read()
508        except EnvironmentError as e:
509            raise AudioFileError(e)
510
511        tag.delall("APIC")
512        frame = mutagen.id3.APIC(
513            encoding=3, mime=image.mime_type, type=APICType.COVER_FRONT,
514            desc=u"", data=data)
515        tag.add(frame)
516
517        with translate_errors():
518            audio.save()
519
520        self.has_images = True
521