1# -*- coding: utf-8 -*- 2# 3# Picard, the next-generation MusicBrainz tagger 4# 5# Copyright (C) 2006-2008, 2011 Lukáš Lalinský 6# Copyright (C) 2009, 2015, 2018-2020 Philipp Wolfer 7# Copyright (C) 2011-2014 Michael Wiencek 8# Copyright (C) 2012 Chad Wilson 9# Copyright (C) 2012 Johannes Weißl 10# Copyright (C) 2012-2014, 2018 Wieland Hoffmann 11# Copyright (C) 2013-2014, 2016, 2018-2020 Laurent Monin 12# Copyright (C) 2013-2014, 2017 Sophist-UK 13# Copyright (C) 2016 Rahul Raturi 14# Copyright (C) 2016-2017 Sambhav Kothari 15# Copyright (C) 2017-2018 Antonio Larrosa 16# Copyright (C) 2018 Vishal Choudhary 17# Copyright (C) 2018 Xincognito10 18# Copyright (C) 2020 Ray Bouchard 19# 20# This program is free software; you can redistribute it and/or 21# modify it under the terms of the GNU General Public License 22# as published by the Free Software Foundation; either version 2 23# of the License, or (at your option) any later version. 24# 25# This program is distributed in the hope that it will be useful, 26# but WITHOUT ANY WARRANTY; without even the implied warranty of 27# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 28# GNU General Public License for more details. 29# 30# You should have received a copy of the GNU General Public License 31# along with this program; if not, write to the Free Software 32# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 33 34 35from collections import namedtuple 36from collections.abc import ( 37 Iterable, 38 MutableMapping, 39) 40from functools import partial 41 42from PyQt5.QtCore import QObject 43 44from picard.config import get_config 45from picard.mbjson import ( 46 artist_credit_from_node, 47 get_score, 48) 49from picard.plugin import ( 50 PluginFunctions, 51 PluginPriority, 52) 53from picard.similarity import similarity2 54from picard.util import ( 55 extract_year_from_date, 56 linear_combination_of_weights, 57) 58from picard.util.imagelist import ImageList 59from picard.util.tags import PRESERVED_TAGS 60 61 62MULTI_VALUED_JOINER = '; ' 63 64# lengths difference over this number of milliseconds will give a score of 0.0 65# equal lengths will give a score of 1.0 66# example 67# a b score 68# 20000 0 0.333333333333 69# 20000 10000 0.666666666667 70# 20000 20000 1.0 71# 20000 30000 0.666666666667 72# 20000 40000 0.333333333333 73# 20000 50000 0.0 74LENGTH_SCORE_THRES_MS = 30000 75 76SimMatchTrack = namedtuple('SimMatchTrack', 'similarity releasegroup release track') 77SimMatchRelease = namedtuple('SimMatchRelease', 'similarity release') 78 79 80def weights_from_release_type_scores(parts, release, release_type_scores, 81 weight_release_type=1): 82 # This function generates a score that determines how likely this release will be selected in a lookup. 83 # The score goes from 0 to 1 with 1 being the most likely to be chosen and 0 the least likely 84 # This score is based on the preferences of release-types found in this release 85 # This algorithm works by taking the scores of the primary type (and secondary if found) and averages them 86 # If no types are found, it is set to the score of the 'Other' type or 0.5 if 'Other' doesnt exist 87 # It appends (score, weight_release_type) to passed parts list 88 89 # if our preference is zero for the release_type, force to never return this recording 90 # by using a large zero weight. This means it only gets picked if there are no others at all. 91 skip_release = False 92 93 type_scores = dict(release_type_scores) 94 score = 0.0 95 if 'release-group' in release and 'primary-type' in release['release-group']: 96 types_found = [release['release-group']['primary-type']] 97 if 'secondary-types' in release['release-group']: 98 types_found += release['release-group']['secondary-types'] 99 other_score = type_scores.get('Other', 0.5) 100 for release_type in types_found: 101 type_score = type_scores.get(release_type, other_score) 102 if type_score == 0: 103 skip_release = True 104 score += type_score 105 score /= len(types_found) 106 107 if skip_release: 108 parts.append((0, 9999)) 109 else: 110 parts.append((score, weight_release_type)) 111 112 113def weights_from_preferred_countries(parts, release, 114 preferred_countries, 115 weight): 116 total_countries = len(preferred_countries) 117 if total_countries: 118 score = 0.0 119 if "country" in release: 120 try: 121 i = preferred_countries.index(release['country']) 122 score = float(total_countries - i) / float(total_countries) 123 except ValueError: 124 pass 125 parts.append((score, weight)) 126 127 128def weights_from_preferred_formats(parts, release, preferred_formats, weight): 129 total_formats = len(preferred_formats) 130 if total_formats and 'media' in release: 131 score = 0.0 132 subtotal = 0 133 for medium in release['media']: 134 if "format" in medium: 135 try: 136 i = preferred_formats.index(medium['format']) 137 score += float(total_formats - i) / float(total_formats) 138 except ValueError: 139 pass 140 subtotal += 1 141 if subtotal > 0: 142 score /= subtotal 143 parts.append((score, weight)) 144 145 146class Metadata(MutableMapping): 147 148 """List of metadata items with dict-like access.""" 149 150 __weights = [ 151 ('title', 22), 152 ('artist', 6), 153 ('album', 12), 154 ('tracknumber', 6), 155 ('totaltracks', 5), 156 ('discnumber', 5), 157 ('totaldiscs', 4), 158 ] 159 160 __date_match_factors = { 161 'exact': 1.00, 162 'year': 0.95, 163 'close_year': 0.85, 164 'exists_vs_null': 0.65, 165 'no_release_date': 0.25, 166 'differed': 0.0 167 } 168 169 multi_valued_joiner = MULTI_VALUED_JOINER 170 171 def __init__(self, *args, deleted_tags=None, images=None, length=None, **kwargs): 172 self._store = dict() 173 self.deleted_tags = set() 174 self.length = 0 175 self.images = ImageList() 176 self.has_common_images = True 177 178 if args or kwargs: 179 self.update(*args, **kwargs) 180 if images is not None: 181 for image in images: 182 self.images.append(image) 183 if deleted_tags is not None: 184 for tag in deleted_tags: 185 del self[tag] 186 if length is not None: 187 self.length = int(length) 188 189 def __bool__(self): 190 return bool(len(self)) 191 192 def __len__(self): 193 return len(self._store) + len(self.images) 194 195 @staticmethod 196 def length_score(a, b): 197 return (1.0 - min(abs(a - b), 198 LENGTH_SCORE_THRES_MS) / float(LENGTH_SCORE_THRES_MS)) 199 200 def compare(self, other, ignored=None): 201 parts = [] 202 if ignored is None: 203 ignored = [] 204 205 if self.length and other.length and '~length' not in ignored: 206 score = self.length_score(self.length, other.length) 207 parts.append((score, 8)) 208 209 for name, weight in self.__weights: 210 if name in ignored: 211 continue 212 a = self[name] 213 b = other[name] 214 if a and b: 215 if name in ('tracknumber', 'totaltracks', 'discnumber', 'totaldiscs'): 216 try: 217 ia = int(a) 218 ib = int(b) 219 except ValueError: 220 ia = a 221 ib = b 222 score = 1.0 - (int(ia != ib)) 223 else: 224 score = similarity2(a, b) 225 parts.append((score, weight)) 226 elif (a and name in other.deleted_tags 227 or b and name in self.deleted_tags): 228 parts.append((0, weight)) 229 return linear_combination_of_weights(parts) 230 231 def compare_to_release(self, release, weights): 232 """ 233 Compare metadata to a MusicBrainz release. Produces a probability as a 234 linear combination of weights that the metadata matches a certain album. 235 """ 236 parts = self.compare_to_release_parts(release, weights) 237 sim = linear_combination_of_weights(parts) * get_score(release) 238 return SimMatchRelease(similarity=sim, release=release) 239 240 def compare_to_release_parts(self, release, weights): 241 parts = [] 242 if "album" in self: 243 b = release['title'] 244 parts.append((similarity2(self["album"], b), weights["album"])) 245 246 if "albumartist" in self and "albumartist" in weights: 247 a = self["albumartist"] 248 b = artist_credit_from_node(release['artist-credit'])[0] 249 parts.append((similarity2(a, b), weights["albumartist"])) 250 251 try: 252 a = int(self["totaltracks"]) 253 b = release['track-count'] 254 score = 0.0 if a > b else 0.3 if a < b else 1.0 255 parts.append((score, weights["totaltracks"])) 256 except (ValueError, KeyError): 257 pass 258 259 # Date Logic 260 date_match_factor = 0.0 261 if "date" in release and release['date'] != '': 262 release_date = release['date'] 263 if "date" in self: 264 metadata_date = self['date'] 265 if release_date == metadata_date: 266 # release has a date and it matches what our metadata had exactly. 267 date_match_factor = self.__date_match_factors['exact'] 268 else: 269 release_year = extract_year_from_date(release_date) 270 if release_year is not None: 271 metadata_year = extract_year_from_date(metadata_date) 272 if metadata_year is not None: 273 if release_year == metadata_year: 274 # release has a date and it matches what our metadata had for year exactly. 275 date_match_factor = self.__date_match_factors['year'] 276 elif abs(release_year - metadata_year) <= 2: 277 # release has a date and it matches what our metadata had closely (year +/- 2). 278 date_match_factor = self.__date_match_factors['close_year'] 279 else: 280 # release has a date but it does not match ours (all else equal, 281 # its better to have an unknown date than a wrong date, since 282 # the unknown could actually be correct) 283 date_match_factor = self.__date_match_factors['differed'] 284 else: 285 # release has a date but we don't have one (all else equal, we prefer 286 # tracks that have non-blank date values) 287 date_match_factor = self.__date_match_factors['exists_vs_null'] 288 else: 289 # release has a no date (all else equal, we don't prefer this 290 # release since its date is missing) 291 date_match_factor = self.__date_match_factors['no_release_date'] 292 293 parts.append((date_match_factor, weights['date'])) 294 295 config = get_config() 296 weights_from_preferred_countries(parts, release, 297 config.setting["preferred_release_countries"], 298 weights["releasecountry"]) 299 300 weights_from_preferred_formats(parts, release, 301 config.setting["preferred_release_formats"], 302 weights["format"]) 303 304 if "releasetype" in weights: 305 weights_from_release_type_scores(parts, release, 306 config.setting["release_type_scores"], 307 weights["releasetype"]) 308 309 rg = QObject.tagger.get_release_group_by_id(release['release-group']['id']) 310 if release['id'] in rg.loaded_albums: 311 parts.append((1.0, 6)) 312 313 return parts 314 315 def compare_to_track(self, track, weights): 316 parts = [] 317 318 if 'title' in self: 319 a = self['title'] 320 b = track.get('title', '') 321 parts.append((similarity2(a, b), weights["title"])) 322 323 if 'artist' in self: 324 a = self['artist'] 325 artist_credits = track.get('artist-credit', []) 326 b = artist_credit_from_node(artist_credits)[0] 327 parts.append((similarity2(a, b), weights["artist"])) 328 329 a = self.length 330 if a > 0 and 'length' in track: 331 b = track['length'] 332 score = self.length_score(a, b) 333 parts.append((score, weights["length"])) 334 335 releases = [] 336 if "releases" in track: 337 releases = track['releases'] 338 339 search_score = get_score(track) 340 if not releases: 341 sim = linear_combination_of_weights(parts) * search_score 342 return SimMatchTrack(similarity=sim, releasegroup=None, release=None, track=track) 343 344 if 'isvideo' in weights: 345 metadata_is_video = self['~video'] == '1' 346 track_is_video = track.get('video', False) 347 score = 1 if metadata_is_video == track_is_video else 0 348 parts.append((score, weights['isvideo'])) 349 350 result = SimMatchTrack(similarity=-1, releasegroup=None, release=None, track=None) 351 for release in releases: 352 release_parts = self.compare_to_release_parts(release, weights) 353 sim = linear_combination_of_weights(parts + release_parts) * search_score 354 if sim > result.similarity: 355 rg = release['release-group'] if "release-group" in release else None 356 result = SimMatchTrack(similarity=sim, releasegroup=rg, release=release, track=track) 357 return result 358 359 def copy(self, other, copy_images=True): 360 self.clear() 361 self._update_from_metadata(other, copy_images) 362 363 def update(self, *args, **kwargs): 364 one_arg = len(args) == 1 365 if one_arg and (isinstance(args[0], self.__class__) or isinstance(args[0], MultiMetadataProxy)): 366 self._update_from_metadata(args[0]) 367 elif one_arg and isinstance(args[0], MutableMapping): 368 # update from MutableMapping (ie. dict) 369 for k, v in args[0].items(): 370 self[k] = v 371 elif args or kwargs: 372 # update from a dict-like constructor parameters 373 for k, v in dict(*args, **kwargs).items(): 374 self[k] = v 375 else: 376 # no argument, raise TypeError to mimic dict.update() 377 raise TypeError("descriptor 'update' of '%s' object needs an argument" % self.__class__.__name__) 378 379 def diff(self, other): 380 """Returns a new Metadata object with only the tags that changed in self compared to other""" 381 m = Metadata() 382 for tag, values in self.rawitems(): 383 other_values = other.getall(tag) 384 if other_values != values: 385 m[tag] = values 386 m.deleted_tags = self.deleted_tags - other.deleted_tags 387 return m 388 389 def _update_from_metadata(self, other, copy_images=True): 390 for k, v in other.rawitems(): 391 self.set(k, v[:]) 392 393 for tag in other.deleted_tags: 394 del self[tag] 395 396 if copy_images and other.images: 397 self.images = other.images.copy() 398 if other.length: 399 self.length = other.length 400 401 def clear(self): 402 self._store.clear() 403 self.images = ImageList() 404 self.length = 0 405 self.clear_deleted() 406 407 def clear_deleted(self): 408 self.deleted_tags = set() 409 410 @staticmethod 411 def normalize_tag(name): 412 return name.rstrip(':') 413 414 def getall(self, name): 415 return self._store.get(self.normalize_tag(name), []) 416 417 def getraw(self, name): 418 return self._store[self.normalize_tag(name)] 419 420 def get(self, key, default=None): 421 values = self._store.get(self.normalize_tag(key), None) 422 if values: 423 return self.multi_valued_joiner.join(values) 424 else: 425 return default 426 427 def __getitem__(self, name): 428 return self.get(name, '') 429 430 def set(self, name, values): 431 name = self.normalize_tag(name) 432 if isinstance(values, str) or not isinstance(values, Iterable): 433 values = [values] 434 values = [str(value) for value in values if value or value == 0] 435 if values: 436 self._store[name] = values 437 self.deleted_tags.discard(name) 438 elif name in self._store: 439 del self[name] 440 441 def __setitem__(self, name, values): 442 self.set(name, values) 443 444 def __contains__(self, name): 445 return self._store.__contains__(self.normalize_tag(name)) 446 447 def __delitem__(self, name): 448 name = self.normalize_tag(name) 449 try: 450 del self._store[name] 451 except KeyError: 452 pass 453 finally: 454 self.deleted_tags.add(name) 455 456 def add(self, name, value): 457 if value or value == 0: 458 name = self.normalize_tag(name) 459 self._store.setdefault(name, []).append(str(value)) 460 self.deleted_tags.discard(name) 461 462 def add_unique(self, name, value): 463 name = self.normalize_tag(name) 464 if value not in self.getall(name): 465 self.add(name, value) 466 467 def delete(self, name): 468 """Deprecated: use del directly""" 469 del self[self.normalize_tag(name)] 470 471 def unset(self, name): 472 """Removes a tag from the metadata, but does not mark it for deletion. 473 474 Args: 475 name: name of the tag to unset 476 """ 477 name = self.normalize_tag(name) 478 try: 479 del self._store[name] 480 except KeyError: 481 pass 482 483 def __iter__(self): 484 return iter(self._store) 485 486 def items(self): 487 for name, values in self._store.items(): 488 for value in values: 489 yield name, value 490 491 def rawitems(self): 492 """Returns the metadata items. 493 494 >>> m.rawitems() 495 [("key1", ["value1", "value2"]), ("key2", ["value3"])] 496 """ 497 return self._store.items() 498 499 def apply_func(self, func): 500 for name, values in list(self.rawitems()): 501 if name not in PRESERVED_TAGS: 502 self[name] = [func(value) for value in values] 503 504 def strip_whitespace(self): 505 """Strip leading/trailing whitespace. 506 507 >>> m = Metadata() 508 >>> m["foo"] = " bar " 509 >>> m["foo"] 510 " bar " 511 >>> m.strip_whitespace() 512 >>> m["foo"] 513 "bar" 514 """ 515 self.apply_func(str.strip) 516 517 def __repr__(self): 518 return "%s(%r, deleted_tags=%r, length=%r, images=%r)" % (self.__class__.__name__, self._store, self.deleted_tags, self.length, self.images) 519 520 def __str__(self): 521 return ("store: %r\ndeleted: %r\nimages: %r\nlength: %r" % (self._store, self.deleted_tags, [str(img) for img in self.images], self.length)) 522 523 524class MultiMetadataProxy: 525 """ 526 Wraps a writable Metadata object together with another 527 readonly Metadata object. 528 529 Changes are written to the writable object, while values are 530 read from both the writable and the readonly object (with the writable 531 object taking precedence). The use case is to provide access to Metadata 532 values without making them part of the actual Metadata. E.g. allow track 533 metadata to use file specific metadata, without making it actually part 534 of the track. 535 """ 536 537 WRITE_METHODS = [ 538 'add_unique', 539 'add', 540 'apply_func', 541 'clear_deleted', 542 'clear', 543 'copy', 544 'delete', 545 'pop', 546 'set', 547 'strip_whitespace', 548 'unset', 549 'update', 550 ] 551 552 def __init__(self, metadata, *readonly_metadata): 553 self.metadata = metadata 554 self.combined_metadata = Metadata() 555 for m in reversed(readonly_metadata): 556 self.combined_metadata.update(m) 557 self.combined_metadata.update(metadata) 558 559 def __getattr__(self, name): 560 if name in self.WRITE_METHODS: 561 return partial(self.__write, name) 562 else: 563 attribute = self.combined_metadata.__getattribute__(name) 564 if callable(attribute): 565 return partial(self.__read, name) 566 else: 567 return attribute 568 569 def __setattr__(self, name, value): 570 if name in ('metadata', 'combined_metadata'): 571 super().__setattr__(name, value) 572 else: 573 self.metadata.__setattr__(name, value) 574 self.combined_metadata.__setattr__(name, value) 575 576 def __write(self, name, *args, **kwargs): 577 func1 = self.metadata.__getattribute__(name) 578 func2 = self.combined_metadata.__getattribute__(name) 579 func1(*args, **kwargs) 580 return func2(*args, **kwargs) 581 582 def __read(self, name, *args, **kwargs): 583 func = self.combined_metadata.__getattribute__(name) 584 return func(*args, **kwargs) 585 586 def __getitem__(self, name): 587 return self.__read('__getitem__', name) 588 589 def __setitem__(self, name, values): 590 return self.__write('__setitem__', name, values) 591 592 def __delitem__(self, name): 593 return self.__write('__delitem__', name) 594 595 def __iter__(self): 596 return self.__read('__iter__') 597 598 def __len__(self): 599 return self.__read('__len__') 600 601 def __contains__(self, name): 602 return self.__read('__contains__', name) 603 604 def __repr__(self): 605 return self.__read('__repr__') 606 607 608_album_metadata_processors = PluginFunctions(label='album_metadata_processors') 609_track_metadata_processors = PluginFunctions(label='track_metadata_processors') 610 611 612def register_album_metadata_processor(function, priority=PluginPriority.NORMAL): 613 """Registers new album-level metadata processor.""" 614 _album_metadata_processors.register(function.__module__, function, priority) 615 616 617def register_track_metadata_processor(function, priority=PluginPriority.NORMAL): 618 """Registers new track-level metadata processor.""" 619 _track_metadata_processors.register(function.__module__, function, priority) 620 621 622def run_album_metadata_processors(album_object, metadata, release): 623 _album_metadata_processors.run(album_object, metadata, release) 624 625 626def run_track_metadata_processors(album_object, metadata, track, release=None): 627 _track_metadata_processors.run(album_object, metadata, track, release) 628