1# Copyright 2004-2013 Joe Wreschnig, Michael Urman, Niklas Janlert, 2# Steven Robertson, Nick Boultbee 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8 9import mutagen.id3 10 11from quodlibet import config, const, print_d 12from quodlibet import util 13from quodlibet.util.iso639 import ISO_639_2 14from quodlibet.util.path import get_temp_cover_file 15from quodlibet.util.string import isascii 16 17from ._audio import AudioFile, translate_errors, AudioFileError 18from ._image import EmbeddedImage, APICType 19 20 21def encoding_for(s): 22 """Returns ID3 encoding ID best for string `s`""" 23 return 3 if isascii(s) else 1 24 25 26RG_KEYS = [ 27 "replaygain_track_peak", "replaygain_track_gain", 28 "replaygain_album_peak", "replaygain_album_gain", 29] 30 31 32# ID3 is absolutely the worst thing ever. 33class ID3File(AudioFile): 34 35 # http://www.unixgods.org/~tilo/ID3/docs/ID3_comparison.html 36 # http://www.id3.org/id3v2.4.0-frames.txt 37 IDS = {"TIT1": "grouping", 38 "TIT2": "title", 39 "TIT3": "version", 40 "TPE1": "artist", 41 "TPE2": "performer", 42 "TPE3": "conductor", 43 "TPE4": "arranger", 44 "TEXT": "lyricist", 45 "TCOM": "composer", 46 "TENC": "encodedby", 47 "TALB": "album", 48 "TRCK": "tracknumber", 49 "TPOS": "discnumber", 50 "TSRC": "isrc", 51 "TCOP": "copyright", 52 "TPUB": "organization", 53 "TSST": "discsubtitle", 54 "TOLY": "author", 55 "TMOO": "mood", 56 "TBPM": "bpm", 57 "TDRC": "date", 58 "TDOR": "originaldate", 59 "TOAL": "originalalbum", 60 "TOPE": "originalartist", 61 "WOAR": "website", 62 "TSOP": "artistsort", 63 "TSOA": "albumsort", 64 "TSOT": "titlesort", 65 "TSO2": "albumartistsort", 66 "TSOC": "composersort", 67 "TMED": "media", 68 "TCMP": "compilation", 69 # TLAN requires an ISO 639-2 language code, check manually 70 #"TLAN": "language" 71 } 72 SDI = dict([(v, k) for k, v in IDS.items()]) 73 74 # At various times, information for this came from 75 # http://musicbrainz.org/docs/specs/metadata_tags.html 76 # http://bugs.musicbrainz.org/ticket/1383 77 # http://musicbrainz.org/doc/MusicBrainzTag 78 TXXX_MAP = { 79 u"MusicBrainz Release Group Id": "musicbrainz_releasegroupid", 80 u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid", 81 u"MusicBrainz Artist Id": "musicbrainz_artistid", 82 u"MusicBrainz Album Id": "musicbrainz_albumid", 83 u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid", 84 u"MusicBrainz TRM Id": "musicbrainz_trmid", 85 u"MusicIP PUID": "musicip_puid", 86 u"MusicMagic Fingerprint": "musicip_fingerprint", 87 u"MusicBrainz Album Status": "musicbrainz_albumstatus", 88 u"MusicBrainz Album Type": "musicbrainz_albumtype", 89 u"MusicBrainz Album Release Country": "releasecountry", 90 u"MusicBrainz Disc Id": "musicbrainz_discid", 91 u"ASIN": "asin", 92 u"ALBUMARTISTSORT": "albumartistsort", 93 u"BARCODE": "barcode", 94 } 95 PAM_XXXT = dict([(v, k) for k, v in TXXX_MAP.items()]) 96 97 Kind = None 98 99 def __init__(self, filename): 100 with translate_errors(): 101 audio = self.Kind(filename) 102 if audio.tags is None: 103 audio.add_tags() 104 tag = audio.tags 105 106 self._parse_info(audio.info) 107 108 for frame in tag.values(): 109 if frame.FrameID == "APIC" and len(frame.data): 110 self.has_images = True 111 continue 112 elif frame.FrameID == "TCON": 113 self["genre"] = "\n".join(frame.genres) 114 continue 115 elif (frame.FrameID == "UFID" and 116 frame.owner == "http://musicbrainz.org"): 117 self["musicbrainz_trackid"] = frame.data.decode("utf-8", 118 "replace") 119 continue 120 elif frame.FrameID == "POPM": 121 rating = frame.rating / 255.0 122 if frame.email == const.EMAIL: 123 try: 124 self.setdefault("~#playcount", frame.count) 125 except AttributeError: 126 pass 127 self.setdefault("~#rating", rating) 128 elif frame.email == config.get("editing", "save_email"): 129 try: 130 self["~#playcount"] = frame.count 131 except AttributeError: 132 pass 133 self["~#rating"] = rating 134 continue 135 elif frame.FrameID == "COMM" and frame.desc == "": 136 name = "comment" 137 elif frame.FrameID in ["COMM", "TXXX"]: 138 if frame.desc.startswith("QuodLibet::"): 139 name = frame.desc[11:] 140 elif frame.desc in self.TXXX_MAP: 141 name = self.TXXX_MAP[frame.desc] 142 else: 143 continue 144 elif frame.FrameID == "RVA2": 145 self.__process_rg(frame) 146 continue 147 elif frame.FrameID == "TMCL": 148 for role, name in frame.people: 149 key = self.__validate_name("performer:" + role) 150 if key: 151 self.add(key, name) 152 continue 153 elif frame.FrameID == "TLAN": 154 self["language"] = "\n".join(frame.text) 155 continue 156 elif frame.FrameID == "USLT": 157 name = "lyrics" 158 else: 159 name = self.IDS.get(frame.FrameID, "").lower() 160 161 name = self.__validate_name(name) 162 if not name: 163 continue 164 name = name.lower() 165 166 id3id = frame.FrameID 167 if id3id.startswith("T"): 168 text = "\n".join(map(str, frame.text)) 169 elif id3id == "COMM": 170 text = "\n".join(frame.text) 171 elif id3id == "USLT": 172 # lyrics are single string, not list 173 text = frame.text 174 elif id3id.startswith("W"): 175 text = frame.url 176 frame.encoding = 0 177 else: 178 continue 179 180 if not text: 181 continue 182 text = self.__distrust_latin1(text, frame.encoding) 183 if text is None: 184 continue 185 186 if name in self: 187 self[name] += "\n" + text 188 else: 189 self[name] = text 190 self[name] = self[name].strip() 191 192 # to catch a missing continue above 193 del name 194 195 # foobar2000 writes long dates in a TXXX DATE tag, leaving the TDRC 196 # tag out. Read the TXXX DATE, but only if the TDRC tag doesn't exist 197 # to avoid reverting or duplicating tags in existing libraries. 198 if audio.tags and "date" not in self: 199 for frame in tag.getall('TXXX:DATE'): 200 self["date"] = "\n".join(map(str, frame.text)) 201 202 # Read TXXX replaygain and replace previously read values from RVA2 203 for frame in tag.getall("TXXX"): 204 k = frame.desc.lower() 205 if k in RG_KEYS: 206 self[str(k)] = u"\n".join(map(str, frame.text)) 207 208 self.sanitize(filename) 209 210 def _parse_info(self, info): 211 """Optionally implement in subclasses""" 212 213 pass 214 215 def __validate_name(self, k): 216 """Returns a ascii string or None if the key isn't supported""" 217 218 if not k or "=" in k or "~" in k: 219 return 220 221 if not (k and "=" not in k and "~" not in k 222 and k.encode("ascii", "replace").decode("ascii") == k): 223 return 224 225 return k 226 227 def __process_rg(self, frame): 228 if frame.channel == 1: 229 if frame.desc == "album": 230 k = "album" 231 elif frame.desc == "track": 232 k = "track" 233 elif "replaygain_track_gain" not in self: 234 k = "track" # fallback 235 else: 236 return 237 self["replaygain_%s_gain" % k] = "%+f dB" % frame.gain 238 self["replaygain_%s_peak" % k] = str(frame.peak) 239 240 @util.cached_property 241 def CODECS(self): 242 codecs = ["utf-8"] 243 codecs_conf = config.get("editing", "id3encoding") 244 codecs.extend(codecs_conf.strip().split()) 245 codecs.append("iso-8859-1") 246 return codecs 247 248 def __distrust_latin1(self, text, encoding): 249 assert isinstance(text, str) 250 if encoding == 0: 251 try: 252 text = text.encode('iso-8859-1') 253 except UnicodeEncodeError: 254 # mutagen might give us text not matching the encoding 255 # https://github.com/quodlibet/mutagen/issues/307 256 return text 257 for codec in self.CODECS: 258 try: 259 text = text.decode(codec) 260 except (UnicodeError, LookupError): 261 pass 262 else: 263 break 264 else: 265 return None 266 return text 267 268 def write(self): 269 with translate_errors(): 270 audio = self.Kind(self['~filename']) 271 272 if audio.tags is None: 273 audio.add_tags() 274 tag = audio.tags 275 276 # prefill TMCL with the ones we can't read 277 mcl = tag.get("TMCL", mutagen.id3.TMCL(encoding=3, people=[])) 278 mcl.people = [(r, n) for (r, n) in mcl.people 279 if not self.__validate_name(r)] 280 281 # delete all TXXX/COMM we can read except empty COMM 282 for frame in ["COMM:", "TXXX:"]: 283 for t in tag.getall(frame + "QuodLibet:"): 284 if t.desc and self.__validate_name(t.desc): 285 del tag[t.HashKey] 286 287 for key in ["UFID:http://musicbrainz.org", 288 "TMCL", 289 "POPM:%s" % const.EMAIL, 290 "POPM:%s" % config.get("editing", "save_email")]: 291 if key in tag: 292 del(tag[key]) 293 294 for key, id3name in self.SDI.items(): 295 tag.delall(id3name) 296 if key not in self: 297 continue 298 enc = encoding_for(self[key]) 299 Kind = mutagen.id3.Frames[id3name] 300 text = self[key].split("\n") 301 if id3name == "WOAR": 302 for t in text: 303 tag.add(Kind(url=t)) 304 else: 305 tag.add(Kind(encoding=enc, text=text)) 306 307 dontwrite = ["genre", "comment", "musicbrainz_trackid", "lyrics"] \ 308 + RG_KEYS + list(self.TXXX_MAP.values()) 309 310 if "musicbrainz_trackid" in self.realkeys(): 311 f = mutagen.id3.UFID( 312 owner="http://musicbrainz.org", 313 data=self["musicbrainz_trackid"].encode("utf-8")) 314 tag.add(f) 315 316 # Issue 439 - Only write valid ISO 639-2 codes to TLAN (else TXXX) 317 tag.delall("TLAN") 318 if "language" in self: 319 langs = self["language"].split("\n") 320 if all([lang in ISO_639_2 for lang in langs]): 321 # Save value(s) to TLAN tag. Guaranteed to be ASCII here 322 tag.add(mutagen.id3.TLAN(encoding=3, text=langs)) 323 dontwrite.append("language") 324 else: 325 print_d("Not using invalid language code '%s' in TLAN" % 326 self["language"]) 327 328 # Filter out known keys, and ones set not to write [generically]. 329 keys_to_write = filter(lambda k: not (k in self.SDI or k in dontwrite), 330 self.realkeys()) 331 for key in keys_to_write: 332 enc = encoding_for(self[key]) 333 if key.startswith("performer:"): 334 mcl.people.append([key.split(":", 1)[1], self[key]]) 335 continue 336 337 f = mutagen.id3.TXXX( 338 encoding=enc, text=self[key].split("\n"), 339 desc=u"QuodLibet::%s" % key) 340 tag.add(f) 341 342 if mcl.people: 343 tag.add(mcl) 344 345 if "genre" in self: 346 enc = encoding_for(self["genre"]) 347 t = self["genre"].split("\n") 348 tag.add(mutagen.id3.TCON(encoding=enc, text=t)) 349 else: 350 try: 351 del(tag["TCON"]) 352 except KeyError: 353 pass 354 355 tag.delall("COMM:") 356 if "comment" in self: 357 enc = encoding_for(self["comment"]) 358 t = self["comment"].split("\n") 359 tag.add(mutagen.id3.COMM(encoding=enc, text=t, desc=u"", 360 lang="\x00\x00\x00")) 361 362 tag.delall("USLT") 363 if "lyrics" in self: 364 enc = encoding_for(self["lyrics"]) 365 # lyrics are single string, not array 366 tag.add(mutagen.id3.USLT(encoding=enc, text=self["lyrics"], 367 desc=u"", lang="\x00\x00\x00")) 368 369 # Delete old foobar replaygain .. 370 for frame in tag.getall("TXXX"): 371 if frame.desc.lower() in RG_KEYS: 372 del tag[frame.HashKey] 373 374 # .. write new one 375 for k in RG_KEYS: 376 # Add new ones 377 if k in self: 378 value = self[k] 379 tag.add(mutagen.id3.TXXX(encoding=encoding_for(value), 380 text=value.split("\n"), 381 desc=k)) 382 383 # we shouldn't delete all, but we use unknown ones as fallback, so make 384 # sure they don't come back after reloading 385 for t in tag.getall("RVA2"): 386 if t.channel == 1: 387 del tag[t.HashKey] 388 389 for k in ["track", "album"]: 390 if ('replaygain_%s_gain' % k) in self: 391 try: 392 gain = float(self["replaygain_%s_gain" % k].split()[0]) 393 except (ValueError, IndexError): 394 gain = 0 395 try: 396 peak = float(self["replaygain_%s_peak" % k]) 397 except (ValueError, KeyError): 398 peak = 0 399 # https://github.com/quodlibet/quodlibet/issues/1027 400 peak = max(min(1.9, peak), 0) 401 gain = max(min(63.9, gain), -64) 402 f = mutagen.id3.RVA2(desc=k, channel=1, gain=gain, peak=peak) 403 tag.add(f) 404 405 for key in self.TXXX_MAP: 406 try: 407 del(tag["TXXX:" + key]) 408 except KeyError: 409 pass 410 for key in self.PAM_XXXT: 411 if key in self.SDI: 412 # we already write it back using non-TXXX frames 413 continue 414 if key in self: 415 value = self[key] 416 f = mutagen.id3.TXXX(encoding=encoding_for(value), 417 text=value.split("\n"), 418 desc=self.PAM_XXXT[key]) 419 tag.add(f) 420 421 if (config.getboolean("editing", "save_to_songs") and 422 (self.has_rating or self.get("~#playcount", 0) != 0)): 423 email = config.get("editing", "save_email").strip() 424 email = email or const.EMAIL 425 t = mutagen.id3.POPM(email=email, 426 rating=int(255 * self("~#rating")), 427 count=self.get("~#playcount", 0)) 428 tag.add(t) 429 430 with translate_errors(): 431 audio.save() 432 self.sanitize() 433 434 can_change_images = True 435 436 def clear_images(self): 437 """Delete all embedded images""" 438 439 with translate_errors(): 440 audio = self.Kind(self["~filename"]) 441 442 if audio.tags is not None: 443 audio.tags.delall("APIC") 444 audio.save() 445 446 self.has_images = False 447 448 def get_images(self): 449 """Returns a list of embedded images""" 450 451 images = [] 452 453 try: 454 with translate_errors(): 455 audio = self.Kind(self["~filename"]) 456 except AudioFileError: 457 return images 458 459 tag = audio.tags 460 if tag is None: 461 return images 462 463 for frame in tag.getall("APIC"): 464 f = get_temp_cover_file(frame.data) 465 images.append(EmbeddedImage(f, frame.mime, type_=frame.type)) 466 467 images.sort(key=lambda c: c.sort_key) 468 return images 469 470 def get_primary_image(self): 471 """Returns the primary embedded image""" 472 473 try: 474 with translate_errors(): 475 audio = self.Kind(self["~filename"]) 476 except AudioFileError: 477 return 478 479 tag = audio.tags 480 if tag is None: 481 return 482 483 # get the APIC frame with type == 3 (cover) or the first one 484 cover = None 485 for frame in tag.getall("APIC"): 486 cover = cover or frame 487 if frame.type == APICType.COVER_FRONT: 488 cover = frame 489 break 490 491 if cover: 492 f = get_temp_cover_file(cover.data) 493 return EmbeddedImage(f, cover.mime, type_=cover.type) 494 495 def set_image(self, image): 496 """Replaces all embedded images by the passed image""" 497 498 with translate_errors(): 499 audio = self.Kind(self["~filename"]) 500 501 if audio.tags is None: 502 audio.add_tags() 503 504 tag = audio.tags 505 506 try: 507 data = image.read() 508 except EnvironmentError as e: 509 raise AudioFileError(e) 510 511 tag.delall("APIC") 512 frame = mutagen.id3.APIC( 513 encoding=3, mime=image.mime_type, type=APICType.COVER_FRONT, 514 desc=u"", data=data) 515 tag.add(frame) 516 517 with translate_errors(): 518 audio.save() 519 520 self.has_images = True 521