1# -*- coding: utf-8 -*-
2#
3# Picard, the next-generation MusicBrainz tagger
4#
5# Copyright (C) 2006-2008, 2011 Lukáš Lalinský
6# Copyright (C) 2009, 2015, 2018-2020 Philipp Wolfer
7# Copyright (C) 2011-2014 Michael Wiencek
8# Copyright (C) 2012 Chad Wilson
9# Copyright (C) 2012 Johannes Weißl
10# Copyright (C) 2012-2014, 2018 Wieland Hoffmann
11# Copyright (C) 2013-2014, 2016, 2018-2020 Laurent Monin
12# Copyright (C) 2013-2014, 2017 Sophist-UK
13# Copyright (C) 2016 Rahul Raturi
14# Copyright (C) 2016-2017 Sambhav Kothari
15# Copyright (C) 2017-2018 Antonio Larrosa
16# Copyright (C) 2018 Vishal Choudhary
17# Copyright (C) 2018 Xincognito10
18# Copyright (C) 2020 Ray Bouchard
19#
20# This program is free software; you can redistribute it and/or
21# modify it under the terms of the GNU General Public License
22# as published by the Free Software Foundation; either version 2
23# of the License, or (at your option) any later version.
24#
25# This program is distributed in the hope that it will be useful,
26# but WITHOUT ANY WARRANTY; without even the implied warranty of
27# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28# GNU General Public License for more details.
29#
30# You should have received a copy of the GNU General Public License
31# along with this program; if not, write to the Free Software
32# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
33
34
35from collections import namedtuple
36from collections.abc import (
37    Iterable,
38    MutableMapping,
39)
40from functools import partial
41
42from PyQt5.QtCore import QObject
43
44from picard.config import get_config
45from picard.mbjson import (
46    artist_credit_from_node,
47    get_score,
48)
49from picard.plugin import (
50    PluginFunctions,
51    PluginPriority,
52)
53from picard.similarity import similarity2
54from picard.util import (
55    extract_year_from_date,
56    linear_combination_of_weights,
57)
58from picard.util.imagelist import ImageList
59from picard.util.tags import PRESERVED_TAGS
60
61
62MULTI_VALUED_JOINER = '; '
63
64# lengths difference over this number of milliseconds will give a score of 0.0
65# equal lengths will give a score of 1.0
66# example
67# a     b     score
68# 20000 0     0.333333333333
69# 20000 10000 0.666666666667
70# 20000 20000 1.0
71# 20000 30000 0.666666666667
72# 20000 40000 0.333333333333
73# 20000 50000 0.0
74LENGTH_SCORE_THRES_MS = 30000
75
76SimMatchTrack = namedtuple('SimMatchTrack', 'similarity releasegroup release track')
77SimMatchRelease = namedtuple('SimMatchRelease', 'similarity release')
78
79
80def weights_from_release_type_scores(parts, release, release_type_scores,
81                                     weight_release_type=1):
82    # This function generates a score that determines how likely this release will be selected in a lookup.
83    # The score goes from 0 to 1 with 1 being the most likely to be chosen and 0 the least likely
84    # This score is based on the preferences of release-types found in this release
85    # This algorithm works by taking the scores of the primary type (and secondary if found) and averages them
86    # If no types are found, it is set to the score of the 'Other' type or 0.5 if 'Other' doesnt exist
87    # It appends (score, weight_release_type) to passed parts list
88
89    # if our preference is zero for the release_type, force to never return this recording
90    # by using a large zero weight. This means it only gets picked if there are no others at all.
91    skip_release = False
92
93    type_scores = dict(release_type_scores)
94    score = 0.0
95    if 'release-group' in release and 'primary-type' in release['release-group']:
96        types_found = [release['release-group']['primary-type']]
97        if 'secondary-types' in release['release-group']:
98            types_found += release['release-group']['secondary-types']
99        other_score = type_scores.get('Other', 0.5)
100        for release_type in types_found:
101            type_score = type_scores.get(release_type, other_score)
102            if type_score == 0:
103                skip_release = True
104            score += type_score
105        score /= len(types_found)
106
107    if skip_release:
108        parts.append((0, 9999))
109    else:
110        parts.append((score, weight_release_type))
111
112
113def weights_from_preferred_countries(parts, release,
114                                     preferred_countries,
115                                     weight):
116    total_countries = len(preferred_countries)
117    if total_countries:
118        score = 0.0
119        if "country" in release:
120            try:
121                i = preferred_countries.index(release['country'])
122                score = float(total_countries - i) / float(total_countries)
123            except ValueError:
124                pass
125        parts.append((score, weight))
126
127
128def weights_from_preferred_formats(parts, release, preferred_formats, weight):
129    total_formats = len(preferred_formats)
130    if total_formats and 'media' in release:
131        score = 0.0
132        subtotal = 0
133        for medium in release['media']:
134            if "format" in medium:
135                try:
136                    i = preferred_formats.index(medium['format'])
137                    score += float(total_formats - i) / float(total_formats)
138                except ValueError:
139                    pass
140                subtotal += 1
141        if subtotal > 0:
142            score /= subtotal
143        parts.append((score, weight))
144
145
146class Metadata(MutableMapping):
147
148    """List of metadata items with dict-like access."""
149
150    __weights = [
151        ('title', 22),
152        ('artist', 6),
153        ('album', 12),
154        ('tracknumber', 6),
155        ('totaltracks', 5),
156        ('discnumber', 5),
157        ('totaldiscs', 4),
158    ]
159
160    __date_match_factors = {
161        'exact': 1.00,
162        'year': 0.95,
163        'close_year': 0.85,
164        'exists_vs_null': 0.65,
165        'no_release_date': 0.25,
166        'differed': 0.0
167    }
168
169    multi_valued_joiner = MULTI_VALUED_JOINER
170
171    def __init__(self, *args, deleted_tags=None, images=None, length=None, **kwargs):
172        self._store = dict()
173        self.deleted_tags = set()
174        self.length = 0
175        self.images = ImageList()
176        self.has_common_images = True
177
178        if args or kwargs:
179            self.update(*args, **kwargs)
180        if images is not None:
181            for image in images:
182                self.images.append(image)
183        if deleted_tags is not None:
184            for tag in deleted_tags:
185                del self[tag]
186        if length is not None:
187            self.length = int(length)
188
189    def __bool__(self):
190        return bool(len(self))
191
192    def __len__(self):
193        return len(self._store) + len(self.images)
194
195    @staticmethod
196    def length_score(a, b):
197        return (1.0 - min(abs(a - b),
198                LENGTH_SCORE_THRES_MS) / float(LENGTH_SCORE_THRES_MS))
199
200    def compare(self, other, ignored=None):
201        parts = []
202        if ignored is None:
203            ignored = []
204
205        if self.length and other.length and '~length' not in ignored:
206            score = self.length_score(self.length, other.length)
207            parts.append((score, 8))
208
209        for name, weight in self.__weights:
210            if name in ignored:
211                continue
212            a = self[name]
213            b = other[name]
214            if a and b:
215                if name in ('tracknumber', 'totaltracks', 'discnumber', 'totaldiscs'):
216                    try:
217                        ia = int(a)
218                        ib = int(b)
219                    except ValueError:
220                        ia = a
221                        ib = b
222                    score = 1.0 - (int(ia != ib))
223                else:
224                    score = similarity2(a, b)
225                parts.append((score, weight))
226            elif (a and name in other.deleted_tags
227                  or b and name in self.deleted_tags):
228                parts.append((0, weight))
229        return linear_combination_of_weights(parts)
230
231    def compare_to_release(self, release, weights):
232        """
233        Compare metadata to a MusicBrainz release. Produces a probability as a
234        linear combination of weights that the metadata matches a certain album.
235        """
236        parts = self.compare_to_release_parts(release, weights)
237        sim = linear_combination_of_weights(parts) * get_score(release)
238        return SimMatchRelease(similarity=sim, release=release)
239
240    def compare_to_release_parts(self, release, weights):
241        parts = []
242        if "album" in self:
243            b = release['title']
244            parts.append((similarity2(self["album"], b), weights["album"]))
245
246        if "albumartist" in self and "albumartist" in weights:
247            a = self["albumartist"]
248            b = artist_credit_from_node(release['artist-credit'])[0]
249            parts.append((similarity2(a, b), weights["albumartist"]))
250
251        try:
252            a = int(self["totaltracks"])
253            b = release['track-count']
254            score = 0.0 if a > b else 0.3 if a < b else 1.0
255            parts.append((score, weights["totaltracks"]))
256        except (ValueError, KeyError):
257            pass
258
259        # Date Logic
260        date_match_factor = 0.0
261        if "date" in release and release['date'] != '':
262            release_date = release['date']
263            if "date" in self:
264                metadata_date = self['date']
265                if release_date == metadata_date:
266                    # release has a date and it matches what our metadata had exactly.
267                    date_match_factor = self.__date_match_factors['exact']
268                else:
269                    release_year = extract_year_from_date(release_date)
270                    if release_year is not None:
271                        metadata_year = extract_year_from_date(metadata_date)
272                        if metadata_year is not None:
273                            if release_year == metadata_year:
274                                # release has a date and it matches what our metadata had for year exactly.
275                                date_match_factor = self.__date_match_factors['year']
276                            elif abs(release_year - metadata_year) <= 2:
277                                # release has a date and it matches what our metadata had closely (year +/- 2).
278                                date_match_factor = self.__date_match_factors['close_year']
279                            else:
280                                # release has a date but it does not match ours (all else equal,
281                                # its better to have an unknown date than a wrong date, since
282                                # the unknown could actually be correct)
283                                date_match_factor = self.__date_match_factors['differed']
284            else:
285                # release has a date but we don't have one (all else equal, we prefer
286                # tracks that have non-blank date values)
287                date_match_factor = self.__date_match_factors['exists_vs_null']
288        else:
289            # release has a no date (all else equal, we don't prefer this
290            # release since its date is missing)
291            date_match_factor = self.__date_match_factors['no_release_date']
292
293        parts.append((date_match_factor, weights['date']))
294
295        config = get_config()
296        weights_from_preferred_countries(parts, release,
297                                         config.setting["preferred_release_countries"],
298                                         weights["releasecountry"])
299
300        weights_from_preferred_formats(parts, release,
301                                       config.setting["preferred_release_formats"],
302                                       weights["format"])
303
304        if "releasetype" in weights:
305            weights_from_release_type_scores(parts, release,
306                                             config.setting["release_type_scores"],
307                                             weights["releasetype"])
308
309        rg = QObject.tagger.get_release_group_by_id(release['release-group']['id'])
310        if release['id'] in rg.loaded_albums:
311            parts.append((1.0, 6))
312
313        return parts
314
315    def compare_to_track(self, track, weights):
316        parts = []
317
318        if 'title' in self:
319            a = self['title']
320            b = track.get('title', '')
321            parts.append((similarity2(a, b), weights["title"]))
322
323        if 'artist' in self:
324            a = self['artist']
325            artist_credits = track.get('artist-credit', [])
326            b = artist_credit_from_node(artist_credits)[0]
327            parts.append((similarity2(a, b), weights["artist"]))
328
329        a = self.length
330        if a > 0 and 'length' in track:
331            b = track['length']
332            score = self.length_score(a, b)
333            parts.append((score, weights["length"]))
334
335        releases = []
336        if "releases" in track:
337            releases = track['releases']
338
339        search_score = get_score(track)
340        if not releases:
341            sim = linear_combination_of_weights(parts) * search_score
342            return SimMatchTrack(similarity=sim, releasegroup=None, release=None, track=track)
343
344        if 'isvideo' in weights:
345            metadata_is_video = self['~video'] == '1'
346            track_is_video = track.get('video', False)
347            score = 1 if metadata_is_video == track_is_video else 0
348            parts.append((score, weights['isvideo']))
349
350        result = SimMatchTrack(similarity=-1, releasegroup=None, release=None, track=None)
351        for release in releases:
352            release_parts = self.compare_to_release_parts(release, weights)
353            sim = linear_combination_of_weights(parts + release_parts) * search_score
354            if sim > result.similarity:
355                rg = release['release-group'] if "release-group" in release else None
356                result = SimMatchTrack(similarity=sim, releasegroup=rg, release=release, track=track)
357        return result
358
359    def copy(self, other, copy_images=True):
360        self.clear()
361        self._update_from_metadata(other, copy_images)
362
363    def update(self, *args, **kwargs):
364        one_arg = len(args) == 1
365        if one_arg and (isinstance(args[0], self.__class__) or isinstance(args[0], MultiMetadataProxy)):
366            self._update_from_metadata(args[0])
367        elif one_arg and isinstance(args[0], MutableMapping):
368            # update from MutableMapping (ie. dict)
369            for k, v in args[0].items():
370                self[k] = v
371        elif args or kwargs:
372            # update from a dict-like constructor parameters
373            for k, v in dict(*args, **kwargs).items():
374                self[k] = v
375        else:
376            # no argument, raise TypeError to mimic dict.update()
377            raise TypeError("descriptor 'update' of '%s' object needs an argument" % self.__class__.__name__)
378
379    def diff(self, other):
380        """Returns a new Metadata object with only the tags that changed in self compared to other"""
381        m = Metadata()
382        for tag, values in self.rawitems():
383            other_values = other.getall(tag)
384            if other_values != values:
385                m[tag] = values
386        m.deleted_tags = self.deleted_tags - other.deleted_tags
387        return m
388
389    def _update_from_metadata(self, other, copy_images=True):
390        for k, v in other.rawitems():
391            self.set(k, v[:])
392
393        for tag in other.deleted_tags:
394            del self[tag]
395
396        if copy_images and other.images:
397            self.images = other.images.copy()
398        if other.length:
399            self.length = other.length
400
401    def clear(self):
402        self._store.clear()
403        self.images = ImageList()
404        self.length = 0
405        self.clear_deleted()
406
407    def clear_deleted(self):
408        self.deleted_tags = set()
409
410    @staticmethod
411    def normalize_tag(name):
412        return name.rstrip(':')
413
414    def getall(self, name):
415        return self._store.get(self.normalize_tag(name), [])
416
417    def getraw(self, name):
418        return self._store[self.normalize_tag(name)]
419
420    def get(self, key, default=None):
421        values = self._store.get(self.normalize_tag(key), None)
422        if values:
423            return self.multi_valued_joiner.join(values)
424        else:
425            return default
426
427    def __getitem__(self, name):
428        return self.get(name, '')
429
430    def set(self, name, values):
431        name = self.normalize_tag(name)
432        if isinstance(values, str) or not isinstance(values, Iterable):
433            values = [values]
434        values = [str(value) for value in values if value or value == 0]
435        if values:
436            self._store[name] = values
437            self.deleted_tags.discard(name)
438        elif name in self._store:
439            del self[name]
440
441    def __setitem__(self, name, values):
442        self.set(name, values)
443
444    def __contains__(self, name):
445        return self._store.__contains__(self.normalize_tag(name))
446
447    def __delitem__(self, name):
448        name = self.normalize_tag(name)
449        try:
450            del self._store[name]
451        except KeyError:
452            pass
453        finally:
454            self.deleted_tags.add(name)
455
456    def add(self, name, value):
457        if value or value == 0:
458            name = self.normalize_tag(name)
459            self._store.setdefault(name, []).append(str(value))
460            self.deleted_tags.discard(name)
461
462    def add_unique(self, name, value):
463        name = self.normalize_tag(name)
464        if value not in self.getall(name):
465            self.add(name, value)
466
467    def delete(self, name):
468        """Deprecated: use del directly"""
469        del self[self.normalize_tag(name)]
470
471    def unset(self, name):
472        """Removes a tag from the metadata, but does not mark it for deletion.
473
474        Args:
475            name: name of the tag to unset
476        """
477        name = self.normalize_tag(name)
478        try:
479            del self._store[name]
480        except KeyError:
481            pass
482
483    def __iter__(self):
484        return iter(self._store)
485
486    def items(self):
487        for name, values in self._store.items():
488            for value in values:
489                yield name, value
490
491    def rawitems(self):
492        """Returns the metadata items.
493
494        >>> m.rawitems()
495        [("key1", ["value1", "value2"]), ("key2", ["value3"])]
496        """
497        return self._store.items()
498
499    def apply_func(self, func):
500        for name, values in list(self.rawitems()):
501            if name not in PRESERVED_TAGS:
502                self[name] = [func(value) for value in values]
503
504    def strip_whitespace(self):
505        """Strip leading/trailing whitespace.
506
507        >>> m = Metadata()
508        >>> m["foo"] = "  bar  "
509        >>> m["foo"]
510        "  bar  "
511        >>> m.strip_whitespace()
512        >>> m["foo"]
513        "bar"
514        """
515        self.apply_func(str.strip)
516
517    def __repr__(self):
518        return "%s(%r, deleted_tags=%r, length=%r, images=%r)" % (self.__class__.__name__, self._store, self.deleted_tags, self.length, self.images)
519
520    def __str__(self):
521        return ("store: %r\ndeleted: %r\nimages: %r\nlength: %r" % (self._store, self.deleted_tags, [str(img) for img in self.images], self.length))
522
523
524class MultiMetadataProxy:
525    """
526    Wraps a writable Metadata object together with another
527    readonly Metadata object.
528
529    Changes are written to the writable object, while values are
530    read from both the writable and the readonly object (with the writable
531    object taking precedence). The use case is to provide access to Metadata
532    values without making them part of the actual Metadata. E.g. allow track
533    metadata to use file specific metadata, without making it actually part
534    of the track.
535    """
536
537    WRITE_METHODS = [
538        'add_unique',
539        'add',
540        'apply_func',
541        'clear_deleted',
542        'clear',
543        'copy',
544        'delete',
545        'pop',
546        'set',
547        'strip_whitespace',
548        'unset',
549        'update',
550    ]
551
552    def __init__(self, metadata, *readonly_metadata):
553        self.metadata = metadata
554        self.combined_metadata = Metadata()
555        for m in reversed(readonly_metadata):
556            self.combined_metadata.update(m)
557        self.combined_metadata.update(metadata)
558
559    def __getattr__(self, name):
560        if name in self.WRITE_METHODS:
561            return partial(self.__write, name)
562        else:
563            attribute = self.combined_metadata.__getattribute__(name)
564            if callable(attribute):
565                return partial(self.__read, name)
566            else:
567                return attribute
568
569    def __setattr__(self, name, value):
570        if name in ('metadata', 'combined_metadata'):
571            super().__setattr__(name, value)
572        else:
573            self.metadata.__setattr__(name, value)
574            self.combined_metadata.__setattr__(name, value)
575
576    def __write(self, name, *args, **kwargs):
577        func1 = self.metadata.__getattribute__(name)
578        func2 = self.combined_metadata.__getattribute__(name)
579        func1(*args, **kwargs)
580        return func2(*args, **kwargs)
581
582    def __read(self, name, *args, **kwargs):
583        func = self.combined_metadata.__getattribute__(name)
584        return func(*args, **kwargs)
585
586    def __getitem__(self, name):
587        return self.__read('__getitem__', name)
588
589    def __setitem__(self, name, values):
590        return self.__write('__setitem__', name, values)
591
592    def __delitem__(self, name):
593        return self.__write('__delitem__', name)
594
595    def __iter__(self):
596        return self.__read('__iter__')
597
598    def __len__(self):
599        return self.__read('__len__')
600
601    def __contains__(self, name):
602        return self.__read('__contains__', name)
603
604    def __repr__(self):
605        return self.__read('__repr__')
606
607
608_album_metadata_processors = PluginFunctions(label='album_metadata_processors')
609_track_metadata_processors = PluginFunctions(label='track_metadata_processors')
610
611
612def register_album_metadata_processor(function, priority=PluginPriority.NORMAL):
613    """Registers new album-level metadata processor."""
614    _album_metadata_processors.register(function.__module__, function, priority)
615
616
617def register_track_metadata_processor(function, priority=PluginPriority.NORMAL):
618    """Registers new track-level metadata processor."""
619    _track_metadata_processors.register(function.__module__, function, priority)
620
621
622def run_album_metadata_processors(album_object, metadata, release):
623    _album_metadata_processors.run(album_object, metadata, release)
624
625
626def run_track_metadata_processors(album_object, metadata, track, release=None):
627    _track_metadata_processors.run(album_object, metadata, track, release)
628