1# -*- coding: utf-8 -*-
2# This file is part of beets.
3# Copyright 2016, Adrian Sampson.
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15
16"""Searches for albums in the MusicBrainz database.
17"""
18from __future__ import division, absolute_import, print_function
19
20import musicbrainzngs
21import re
22import traceback
23from six.moves.urllib.parse import urljoin
24
25from beets import logging
26import beets.autotag.hooks
27import beets
28from beets import util
29from beets import config
30import six
31
32VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377'
33
34if util.SNI_SUPPORTED:
35    BASE_URL = 'https://musicbrainz.org/'
36else:
37    BASE_URL = 'http://musicbrainz.org/'
38
39SKIPPED_TRACKS = ['[data track]']
40
41musicbrainzngs.set_useragent('beets', beets.__version__,
42                             'http://beets.io/')
43
44
45class MusicBrainzAPIError(util.HumanReadableException):
46    """An error while talking to MusicBrainz. The `query` field is the
47    parameter to the action and may have any type.
48    """
49    def __init__(self, reason, verb, query, tb=None):
50        self.query = query
51        if isinstance(reason, musicbrainzngs.WebServiceError):
52            reason = u'MusicBrainz not reachable'
53        super(MusicBrainzAPIError, self).__init__(reason, verb, tb)
54
55    def get_message(self):
56        return u'{0} in {1} with query {2}'.format(
57            self._reasonstr(), self.verb, repr(self.query)
58        )
59
60log = logging.getLogger('beets')
61
62RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups',
63                    'labels', 'artist-credits', 'aliases',
64                    'recording-level-rels', 'work-rels',
65                    'work-level-rels', 'artist-rels']
66TRACK_INCLUDES = ['artists', 'aliases']
67if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']:
68    TRACK_INCLUDES += ['work-level-rels', 'artist-rels']
69
70
71def track_url(trackid):
72    return urljoin(BASE_URL, 'recording/' + trackid)
73
74
75def album_url(albumid):
76    return urljoin(BASE_URL, 'release/' + albumid)
77
78
79def configure():
80    """Set up the python-musicbrainz-ngs module according to settings
81    from the beets configuration. This should be called at startup.
82    """
83    hostname = config['musicbrainz']['host'].as_str()
84    musicbrainzngs.set_hostname(hostname)
85    musicbrainzngs.set_rate_limit(
86        config['musicbrainz']['ratelimit_interval'].as_number(),
87        config['musicbrainz']['ratelimit'].get(int),
88    )
89
90
91def _preferred_alias(aliases):
92    """Given an list of alias structures for an artist credit, select
93    and return the user's preferred alias alias or None if no matching
94    alias is found.
95    """
96    if not aliases:
97        return
98
99    # Only consider aliases that have locales set.
100    aliases = [a for a in aliases if 'locale' in a]
101
102    # Search configured locales in order.
103    for locale in config['import']['languages'].as_str_seq():
104        # Find matching primary aliases for this locale.
105        matches = [a for a in aliases
106                   if a['locale'] == locale and 'primary' in a]
107        # Skip to the next locale if we have no matches
108        if not matches:
109            continue
110
111        return matches[0]
112
113
114def _preferred_release_event(release):
115    """Given a release, select and return the user's preferred release
116    event as a tuple of (country, release_date). Fall back to the
117    default release event if a preferred event is not found.
118    """
119    countries = config['match']['preferred']['countries'].as_str_seq()
120
121    for country in countries:
122        for event in release.get('release-event-list', {}):
123            try:
124                if country in event['area']['iso-3166-1-code-list']:
125                    return country, event['date']
126            except KeyError:
127                pass
128
129    return release.get('country'), release.get('date')
130
131
132def _flatten_artist_credit(credit):
133    """Given a list representing an ``artist-credit`` block, flatten the
134    data into a triple of joined artist name strings: canonical, sort, and
135    credit.
136    """
137    artist_parts = []
138    artist_sort_parts = []
139    artist_credit_parts = []
140    for el in credit:
141        if isinstance(el, six.string_types):
142            # Join phrase.
143            artist_parts.append(el)
144            artist_credit_parts.append(el)
145            artist_sort_parts.append(el)
146
147        else:
148            alias = _preferred_alias(el['artist'].get('alias-list', ()))
149
150            # An artist.
151            if alias:
152                cur_artist_name = alias['alias']
153            else:
154                cur_artist_name = el['artist']['name']
155            artist_parts.append(cur_artist_name)
156
157            # Artist sort name.
158            if alias:
159                artist_sort_parts.append(alias['sort-name'])
160            elif 'sort-name' in el['artist']:
161                artist_sort_parts.append(el['artist']['sort-name'])
162            else:
163                artist_sort_parts.append(cur_artist_name)
164
165            # Artist credit.
166            if 'name' in el:
167                artist_credit_parts.append(el['name'])
168            else:
169                artist_credit_parts.append(cur_artist_name)
170
171    return (
172        ''.join(artist_parts),
173        ''.join(artist_sort_parts),
174        ''.join(artist_credit_parts),
175    )
176
177
178def track_info(recording, index=None, medium=None, medium_index=None,
179               medium_total=None):
180    """Translates a MusicBrainz recording result dictionary into a beets
181    ``TrackInfo`` object. Three parameters are optional and are used
182    only for tracks that appear on releases (non-singletons): ``index``,
183    the overall track number; ``medium``, the disc number;
184    ``medium_index``, the track's index on its medium; ``medium_total``,
185    the number of tracks on the medium. Each number is a 1-based index.
186    """
187    info = beets.autotag.hooks.TrackInfo(
188        recording['title'],
189        recording['id'],
190        index=index,
191        medium=medium,
192        medium_index=medium_index,
193        medium_total=medium_total,
194        data_source=u'MusicBrainz',
195        data_url=track_url(recording['id']),
196    )
197
198    if recording.get('artist-credit'):
199        # Get the artist names.
200        info.artist, info.artist_sort, info.artist_credit = \
201            _flatten_artist_credit(recording['artist-credit'])
202
203        # Get the ID and sort name of the first artist.
204        artist = recording['artist-credit'][0]['artist']
205        info.artist_id = artist['id']
206
207    if recording.get('length'):
208        info.length = int(recording['length']) / (1000.0)
209
210    lyricist = []
211    composer = []
212    composer_sort = []
213    for work_relation in recording.get('work-relation-list', ()):
214        if work_relation['type'] != 'performance':
215            continue
216        for artist_relation in work_relation['work'].get(
217                'artist-relation-list', ()):
218            if 'type' in artist_relation:
219                type = artist_relation['type']
220                if type == 'lyricist':
221                    lyricist.append(artist_relation['artist']['name'])
222                elif type == 'composer':
223                    composer.append(artist_relation['artist']['name'])
224                    composer_sort.append(
225                        artist_relation['artist']['sort-name'])
226    if lyricist:
227        info.lyricist = u', '.join(lyricist)
228    if composer:
229        info.composer = u', '.join(composer)
230        info.composer_sort = u', '.join(composer_sort)
231
232    arranger = []
233    for artist_relation in recording.get('artist-relation-list', ()):
234        if 'type' in artist_relation:
235            type = artist_relation['type']
236            if type == 'arranger':
237                arranger.append(artist_relation['artist']['name'])
238    if arranger:
239        info.arranger = u', '.join(arranger)
240
241    info.decode()
242    return info
243
244
245def _set_date_str(info, date_str, original=False):
246    """Given a (possibly partial) YYYY-MM-DD string and an AlbumInfo
247    object, set the object's release date fields appropriately. If
248    `original`, then set the original_year, etc., fields.
249    """
250    if date_str:
251        date_parts = date_str.split('-')
252        for key in ('year', 'month', 'day'):
253            if date_parts:
254                date_part = date_parts.pop(0)
255                try:
256                    date_num = int(date_part)
257                except ValueError:
258                    continue
259
260                if original:
261                    key = 'original_' + key
262                setattr(info, key, date_num)
263
264
265def album_info(release):
266    """Takes a MusicBrainz release result dictionary and returns a beets
267    AlbumInfo object containing the interesting data about that release.
268    """
269    # Get artist name using join phrases.
270    artist_name, artist_sort_name, artist_credit_name = \
271        _flatten_artist_credit(release['artist-credit'])
272
273    # Basic info.
274    track_infos = []
275    index = 0
276    for medium in release['medium-list']:
277        disctitle = medium.get('title')
278        format = medium.get('format')
279
280        if format in config['match']['ignored_media'].as_str_seq():
281            continue
282
283        all_tracks = medium['track-list']
284        if ('data-track-list' in medium
285                and not config['match']['ignore_data_tracks']):
286            all_tracks += medium['data-track-list']
287        track_count = len(all_tracks)
288
289        if 'pregap' in medium:
290            all_tracks.insert(0, medium['pregap'])
291
292        for track in all_tracks:
293
294            if ('title' in track['recording'] and
295                    track['recording']['title'] in SKIPPED_TRACKS):
296                continue
297
298            if ('video' in track['recording'] and
299                    track['recording']['video'] == 'true' and
300                    config['match']['ignore_video_tracks']):
301                continue
302
303            # Basic information from the recording.
304            index += 1
305            ti = track_info(
306                track['recording'],
307                index,
308                int(medium['position']),
309                int(track['position']),
310                track_count,
311            )
312            ti.release_track_id = track['id']
313            ti.disctitle = disctitle
314            ti.media = format
315            ti.track_alt = track['number']
316
317            # Prefer track data, where present, over recording data.
318            if track.get('title'):
319                ti.title = track['title']
320            if track.get('artist-credit'):
321                # Get the artist names.
322                ti.artist, ti.artist_sort, ti.artist_credit = \
323                    _flatten_artist_credit(track['artist-credit'])
324                ti.artist_id = track['artist-credit'][0]['artist']['id']
325            if track.get('length'):
326                ti.length = int(track['length']) / (1000.0)
327
328            track_infos.append(ti)
329
330    info = beets.autotag.hooks.AlbumInfo(
331        release['title'],
332        release['id'],
333        artist_name,
334        release['artist-credit'][0]['artist']['id'],
335        track_infos,
336        mediums=len(release['medium-list']),
337        artist_sort=artist_sort_name,
338        artist_credit=artist_credit_name,
339        data_source=u'MusicBrainz',
340        data_url=album_url(release['id']),
341    )
342    info.va = info.artist_id == VARIOUS_ARTISTS_ID
343    if info.va:
344        info.artist = config['va_name'].as_str()
345    info.asin = release.get('asin')
346    info.releasegroup_id = release['release-group']['id']
347    info.albumstatus = release.get('status')
348
349    # Get the disambiguation strings at the release and release group level.
350    if release['release-group'].get('disambiguation'):
351        info.releasegroupdisambig = \
352            release['release-group'].get('disambiguation')
353    if release.get('disambiguation'):
354        info.albumdisambig = release.get('disambiguation')
355
356    # Get the "classic" Release type. This data comes from a legacy API
357    # feature before MusicBrainz supported multiple release types.
358    if 'type' in release['release-group']:
359        reltype = release['release-group']['type']
360        if reltype:
361            info.albumtype = reltype.lower()
362
363    # Log the new-style "primary" and "secondary" release types.
364    # Eventually, we'd like to actually store this data, but we just log
365    # it for now to help understand the differences.
366    if 'primary-type' in release['release-group']:
367        rel_primarytype = release['release-group']['primary-type']
368        if rel_primarytype:
369            log.debug('primary MB release type: ' + rel_primarytype.lower())
370    if 'secondary-type-list' in release['release-group']:
371        if release['release-group']['secondary-type-list']:
372            log.debug('secondary MB release type(s): ' + ', '.join(
373                [secondarytype.lower() for secondarytype in
374                    release['release-group']['secondary-type-list']]))
375
376    # Release events.
377    info.country, release_date = _preferred_release_event(release)
378    release_group_date = release['release-group'].get('first-release-date')
379    if not release_date:
380        # Fall back if release-specific date is not available.
381        release_date = release_group_date
382    _set_date_str(info, release_date, False)
383    _set_date_str(info, release_group_date, True)
384
385    # Label name.
386    if release.get('label-info-list'):
387        label_info = release['label-info-list'][0]
388        if label_info.get('label'):
389            label = label_info['label']['name']
390            if label != '[no label]':
391                info.label = label
392        info.catalognum = label_info.get('catalog-number')
393
394    # Text representation data.
395    if release.get('text-representation'):
396        rep = release['text-representation']
397        info.script = rep.get('script')
398        info.language = rep.get('language')
399
400    # Media (format).
401    if release['medium-list']:
402        first_medium = release['medium-list'][0]
403        info.media = first_medium.get('format')
404
405    info.decode()
406    return info
407
408
409def match_album(artist, album, tracks=None):
410    """Searches for a single album ("release" in MusicBrainz parlance)
411    and returns an iterator over AlbumInfo objects. May raise a
412    MusicBrainzAPIError.
413
414    The query consists of an artist name, an album name, and,
415    optionally, a number of tracks on the album.
416    """
417    # Build search criteria.
418    criteria = {'release': album.lower().strip()}
419    if artist is not None:
420        criteria['artist'] = artist.lower().strip()
421    else:
422        # Various Artists search.
423        criteria['arid'] = VARIOUS_ARTISTS_ID
424    if tracks is not None:
425        criteria['tracks'] = six.text_type(tracks)
426
427    # Abort if we have no search terms.
428    if not any(criteria.values()):
429        return
430
431    try:
432        log.debug(u'Searching for MusicBrainz releases with: {!r}', criteria)
433        res = musicbrainzngs.search_releases(
434            limit=config['musicbrainz']['searchlimit'].get(int), **criteria)
435    except musicbrainzngs.MusicBrainzError as exc:
436        raise MusicBrainzAPIError(exc, 'release search', criteria,
437                                  traceback.format_exc())
438    for release in res['release-list']:
439        # The search result is missing some data (namely, the tracks),
440        # so we just use the ID and fetch the rest of the information.
441        albuminfo = album_for_id(release['id'])
442        if albuminfo is not None:
443            yield albuminfo
444
445
446def match_track(artist, title):
447    """Searches for a single track and returns an iterable of TrackInfo
448    objects. May raise a MusicBrainzAPIError.
449    """
450    criteria = {
451        'artist': artist.lower().strip(),
452        'recording': title.lower().strip(),
453    }
454
455    if not any(criteria.values()):
456        return
457
458    try:
459        res = musicbrainzngs.search_recordings(
460            limit=config['musicbrainz']['searchlimit'].get(int), **criteria)
461    except musicbrainzngs.MusicBrainzError as exc:
462        raise MusicBrainzAPIError(exc, 'recording search', criteria,
463                                  traceback.format_exc())
464    for recording in res['recording-list']:
465        yield track_info(recording)
466
467
468def _parse_id(s):
469    """Search for a MusicBrainz ID in the given string and return it. If
470    no ID can be found, return None.
471    """
472    # Find the first thing that looks like a UUID/MBID.
473    match = re.search(u'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s)
474    if match:
475        return match.group()
476
477
478def album_for_id(releaseid):
479    """Fetches an album by its MusicBrainz ID and returns an AlbumInfo
480    object or None if the album is not found. May raise a
481    MusicBrainzAPIError.
482    """
483    log.debug(u'Requesting MusicBrainz release {}', releaseid)
484    albumid = _parse_id(releaseid)
485    if not albumid:
486        log.debug(u'Invalid MBID ({0}).', releaseid)
487        return
488    try:
489        res = musicbrainzngs.get_release_by_id(albumid,
490                                               RELEASE_INCLUDES)
491    except musicbrainzngs.ResponseError:
492        log.debug(u'Album ID match failed.')
493        return None
494    except musicbrainzngs.MusicBrainzError as exc:
495        raise MusicBrainzAPIError(exc, u'get release by ID', albumid,
496                                  traceback.format_exc())
497    return album_info(res['release'])
498
499
500def track_for_id(releaseid):
501    """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
502    or None if no track is found. May raise a MusicBrainzAPIError.
503    """
504    trackid = _parse_id(releaseid)
505    if not trackid:
506        log.debug(u'Invalid MBID ({0}).', releaseid)
507        return
508    try:
509        res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES)
510    except musicbrainzngs.ResponseError:
511        log.debug(u'Track ID match failed.')
512        return None
513    except musicbrainzngs.MusicBrainzError as exc:
514        raise MusicBrainzAPIError(exc, u'get recording by ID', trackid,
515                                  traceback.format_exc())
516    return track_info(res['recording'])
517