1# -*- coding: utf-8 -*- 2# This file is part of beets. 3# Copyright 2016, Adrian Sampson. 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15 16"""Searches for albums in the MusicBrainz database. 17""" 18from __future__ import division, absolute_import, print_function 19 20import musicbrainzngs 21import re 22import traceback 23from six.moves.urllib.parse import urljoin 24 25from beets import logging 26import beets.autotag.hooks 27import beets 28from beets import util 29from beets import config 30import six 31 32VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' 33 34if util.SNI_SUPPORTED: 35 BASE_URL = 'https://musicbrainz.org/' 36else: 37 BASE_URL = 'http://musicbrainz.org/' 38 39SKIPPED_TRACKS = ['[data track]'] 40 41musicbrainzngs.set_useragent('beets', beets.__version__, 42 'http://beets.io/') 43 44 45class MusicBrainzAPIError(util.HumanReadableException): 46 """An error while talking to MusicBrainz. The `query` field is the 47 parameter to the action and may have any type. 48 """ 49 def __init__(self, reason, verb, query, tb=None): 50 self.query = query 51 if isinstance(reason, musicbrainzngs.WebServiceError): 52 reason = u'MusicBrainz not reachable' 53 super(MusicBrainzAPIError, self).__init__(reason, verb, tb) 54 55 def get_message(self): 56 return u'{0} in {1} with query {2}'.format( 57 self._reasonstr(), self.verb, repr(self.query) 58 ) 59 60log = logging.getLogger('beets') 61 62RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 63 'labels', 'artist-credits', 'aliases', 64 'recording-level-rels', 'work-rels', 65 'work-level-rels', 'artist-rels'] 66TRACK_INCLUDES = ['artists', 'aliases'] 67if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: 68 TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] 69 70 71def track_url(trackid): 72 return urljoin(BASE_URL, 'recording/' + trackid) 73 74 75def album_url(albumid): 76 return urljoin(BASE_URL, 'release/' + albumid) 77 78 79def configure(): 80 """Set up the python-musicbrainz-ngs module according to settings 81 from the beets configuration. This should be called at startup. 82 """ 83 hostname = config['musicbrainz']['host'].as_str() 84 musicbrainzngs.set_hostname(hostname) 85 musicbrainzngs.set_rate_limit( 86 config['musicbrainz']['ratelimit_interval'].as_number(), 87 config['musicbrainz']['ratelimit'].get(int), 88 ) 89 90 91def _preferred_alias(aliases): 92 """Given an list of alias structures for an artist credit, select 93 and return the user's preferred alias alias or None if no matching 94 alias is found. 95 """ 96 if not aliases: 97 return 98 99 # Only consider aliases that have locales set. 100 aliases = [a for a in aliases if 'locale' in a] 101 102 # Search configured locales in order. 103 for locale in config['import']['languages'].as_str_seq(): 104 # Find matching primary aliases for this locale. 105 matches = [a for a in aliases 106 if a['locale'] == locale and 'primary' in a] 107 # Skip to the next locale if we have no matches 108 if not matches: 109 continue 110 111 return matches[0] 112 113 114def _preferred_release_event(release): 115 """Given a release, select and return the user's preferred release 116 event as a tuple of (country, release_date). Fall back to the 117 default release event if a preferred event is not found. 118 """ 119 countries = config['match']['preferred']['countries'].as_str_seq() 120 121 for country in countries: 122 for event in release.get('release-event-list', {}): 123 try: 124 if country in event['area']['iso-3166-1-code-list']: 125 return country, event['date'] 126 except KeyError: 127 pass 128 129 return release.get('country'), release.get('date') 130 131 132def _flatten_artist_credit(credit): 133 """Given a list representing an ``artist-credit`` block, flatten the 134 data into a triple of joined artist name strings: canonical, sort, and 135 credit. 136 """ 137 artist_parts = [] 138 artist_sort_parts = [] 139 artist_credit_parts = [] 140 for el in credit: 141 if isinstance(el, six.string_types): 142 # Join phrase. 143 artist_parts.append(el) 144 artist_credit_parts.append(el) 145 artist_sort_parts.append(el) 146 147 else: 148 alias = _preferred_alias(el['artist'].get('alias-list', ())) 149 150 # An artist. 151 if alias: 152 cur_artist_name = alias['alias'] 153 else: 154 cur_artist_name = el['artist']['name'] 155 artist_parts.append(cur_artist_name) 156 157 # Artist sort name. 158 if alias: 159 artist_sort_parts.append(alias['sort-name']) 160 elif 'sort-name' in el['artist']: 161 artist_sort_parts.append(el['artist']['sort-name']) 162 else: 163 artist_sort_parts.append(cur_artist_name) 164 165 # Artist credit. 166 if 'name' in el: 167 artist_credit_parts.append(el['name']) 168 else: 169 artist_credit_parts.append(cur_artist_name) 170 171 return ( 172 ''.join(artist_parts), 173 ''.join(artist_sort_parts), 174 ''.join(artist_credit_parts), 175 ) 176 177 178def track_info(recording, index=None, medium=None, medium_index=None, 179 medium_total=None): 180 """Translates a MusicBrainz recording result dictionary into a beets 181 ``TrackInfo`` object. Three parameters are optional and are used 182 only for tracks that appear on releases (non-singletons): ``index``, 183 the overall track number; ``medium``, the disc number; 184 ``medium_index``, the track's index on its medium; ``medium_total``, 185 the number of tracks on the medium. Each number is a 1-based index. 186 """ 187 info = beets.autotag.hooks.TrackInfo( 188 recording['title'], 189 recording['id'], 190 index=index, 191 medium=medium, 192 medium_index=medium_index, 193 medium_total=medium_total, 194 data_source=u'MusicBrainz', 195 data_url=track_url(recording['id']), 196 ) 197 198 if recording.get('artist-credit'): 199 # Get the artist names. 200 info.artist, info.artist_sort, info.artist_credit = \ 201 _flatten_artist_credit(recording['artist-credit']) 202 203 # Get the ID and sort name of the first artist. 204 artist = recording['artist-credit'][0]['artist'] 205 info.artist_id = artist['id'] 206 207 if recording.get('length'): 208 info.length = int(recording['length']) / (1000.0) 209 210 lyricist = [] 211 composer = [] 212 composer_sort = [] 213 for work_relation in recording.get('work-relation-list', ()): 214 if work_relation['type'] != 'performance': 215 continue 216 for artist_relation in work_relation['work'].get( 217 'artist-relation-list', ()): 218 if 'type' in artist_relation: 219 type = artist_relation['type'] 220 if type == 'lyricist': 221 lyricist.append(artist_relation['artist']['name']) 222 elif type == 'composer': 223 composer.append(artist_relation['artist']['name']) 224 composer_sort.append( 225 artist_relation['artist']['sort-name']) 226 if lyricist: 227 info.lyricist = u', '.join(lyricist) 228 if composer: 229 info.composer = u', '.join(composer) 230 info.composer_sort = u', '.join(composer_sort) 231 232 arranger = [] 233 for artist_relation in recording.get('artist-relation-list', ()): 234 if 'type' in artist_relation: 235 type = artist_relation['type'] 236 if type == 'arranger': 237 arranger.append(artist_relation['artist']['name']) 238 if arranger: 239 info.arranger = u', '.join(arranger) 240 241 info.decode() 242 return info 243 244 245def _set_date_str(info, date_str, original=False): 246 """Given a (possibly partial) YYYY-MM-DD string and an AlbumInfo 247 object, set the object's release date fields appropriately. If 248 `original`, then set the original_year, etc., fields. 249 """ 250 if date_str: 251 date_parts = date_str.split('-') 252 for key in ('year', 'month', 'day'): 253 if date_parts: 254 date_part = date_parts.pop(0) 255 try: 256 date_num = int(date_part) 257 except ValueError: 258 continue 259 260 if original: 261 key = 'original_' + key 262 setattr(info, key, date_num) 263 264 265def album_info(release): 266 """Takes a MusicBrainz release result dictionary and returns a beets 267 AlbumInfo object containing the interesting data about that release. 268 """ 269 # Get artist name using join phrases. 270 artist_name, artist_sort_name, artist_credit_name = \ 271 _flatten_artist_credit(release['artist-credit']) 272 273 # Basic info. 274 track_infos = [] 275 index = 0 276 for medium in release['medium-list']: 277 disctitle = medium.get('title') 278 format = medium.get('format') 279 280 if format in config['match']['ignored_media'].as_str_seq(): 281 continue 282 283 all_tracks = medium['track-list'] 284 if ('data-track-list' in medium 285 and not config['match']['ignore_data_tracks']): 286 all_tracks += medium['data-track-list'] 287 track_count = len(all_tracks) 288 289 if 'pregap' in medium: 290 all_tracks.insert(0, medium['pregap']) 291 292 for track in all_tracks: 293 294 if ('title' in track['recording'] and 295 track['recording']['title'] in SKIPPED_TRACKS): 296 continue 297 298 if ('video' in track['recording'] and 299 track['recording']['video'] == 'true' and 300 config['match']['ignore_video_tracks']): 301 continue 302 303 # Basic information from the recording. 304 index += 1 305 ti = track_info( 306 track['recording'], 307 index, 308 int(medium['position']), 309 int(track['position']), 310 track_count, 311 ) 312 ti.release_track_id = track['id'] 313 ti.disctitle = disctitle 314 ti.media = format 315 ti.track_alt = track['number'] 316 317 # Prefer track data, where present, over recording data. 318 if track.get('title'): 319 ti.title = track['title'] 320 if track.get('artist-credit'): 321 # Get the artist names. 322 ti.artist, ti.artist_sort, ti.artist_credit = \ 323 _flatten_artist_credit(track['artist-credit']) 324 ti.artist_id = track['artist-credit'][0]['artist']['id'] 325 if track.get('length'): 326 ti.length = int(track['length']) / (1000.0) 327 328 track_infos.append(ti) 329 330 info = beets.autotag.hooks.AlbumInfo( 331 release['title'], 332 release['id'], 333 artist_name, 334 release['artist-credit'][0]['artist']['id'], 335 track_infos, 336 mediums=len(release['medium-list']), 337 artist_sort=artist_sort_name, 338 artist_credit=artist_credit_name, 339 data_source=u'MusicBrainz', 340 data_url=album_url(release['id']), 341 ) 342 info.va = info.artist_id == VARIOUS_ARTISTS_ID 343 if info.va: 344 info.artist = config['va_name'].as_str() 345 info.asin = release.get('asin') 346 info.releasegroup_id = release['release-group']['id'] 347 info.albumstatus = release.get('status') 348 349 # Get the disambiguation strings at the release and release group level. 350 if release['release-group'].get('disambiguation'): 351 info.releasegroupdisambig = \ 352 release['release-group'].get('disambiguation') 353 if release.get('disambiguation'): 354 info.albumdisambig = release.get('disambiguation') 355 356 # Get the "classic" Release type. This data comes from a legacy API 357 # feature before MusicBrainz supported multiple release types. 358 if 'type' in release['release-group']: 359 reltype = release['release-group']['type'] 360 if reltype: 361 info.albumtype = reltype.lower() 362 363 # Log the new-style "primary" and "secondary" release types. 364 # Eventually, we'd like to actually store this data, but we just log 365 # it for now to help understand the differences. 366 if 'primary-type' in release['release-group']: 367 rel_primarytype = release['release-group']['primary-type'] 368 if rel_primarytype: 369 log.debug('primary MB release type: ' + rel_primarytype.lower()) 370 if 'secondary-type-list' in release['release-group']: 371 if release['release-group']['secondary-type-list']: 372 log.debug('secondary MB release type(s): ' + ', '.join( 373 [secondarytype.lower() for secondarytype in 374 release['release-group']['secondary-type-list']])) 375 376 # Release events. 377 info.country, release_date = _preferred_release_event(release) 378 release_group_date = release['release-group'].get('first-release-date') 379 if not release_date: 380 # Fall back if release-specific date is not available. 381 release_date = release_group_date 382 _set_date_str(info, release_date, False) 383 _set_date_str(info, release_group_date, True) 384 385 # Label name. 386 if release.get('label-info-list'): 387 label_info = release['label-info-list'][0] 388 if label_info.get('label'): 389 label = label_info['label']['name'] 390 if label != '[no label]': 391 info.label = label 392 info.catalognum = label_info.get('catalog-number') 393 394 # Text representation data. 395 if release.get('text-representation'): 396 rep = release['text-representation'] 397 info.script = rep.get('script') 398 info.language = rep.get('language') 399 400 # Media (format). 401 if release['medium-list']: 402 first_medium = release['medium-list'][0] 403 info.media = first_medium.get('format') 404 405 info.decode() 406 return info 407 408 409def match_album(artist, album, tracks=None): 410 """Searches for a single album ("release" in MusicBrainz parlance) 411 and returns an iterator over AlbumInfo objects. May raise a 412 MusicBrainzAPIError. 413 414 The query consists of an artist name, an album name, and, 415 optionally, a number of tracks on the album. 416 """ 417 # Build search criteria. 418 criteria = {'release': album.lower().strip()} 419 if artist is not None: 420 criteria['artist'] = artist.lower().strip() 421 else: 422 # Various Artists search. 423 criteria['arid'] = VARIOUS_ARTISTS_ID 424 if tracks is not None: 425 criteria['tracks'] = six.text_type(tracks) 426 427 # Abort if we have no search terms. 428 if not any(criteria.values()): 429 return 430 431 try: 432 log.debug(u'Searching for MusicBrainz releases with: {!r}', criteria) 433 res = musicbrainzngs.search_releases( 434 limit=config['musicbrainz']['searchlimit'].get(int), **criteria) 435 except musicbrainzngs.MusicBrainzError as exc: 436 raise MusicBrainzAPIError(exc, 'release search', criteria, 437 traceback.format_exc()) 438 for release in res['release-list']: 439 # The search result is missing some data (namely, the tracks), 440 # so we just use the ID and fetch the rest of the information. 441 albuminfo = album_for_id(release['id']) 442 if albuminfo is not None: 443 yield albuminfo 444 445 446def match_track(artist, title): 447 """Searches for a single track and returns an iterable of TrackInfo 448 objects. May raise a MusicBrainzAPIError. 449 """ 450 criteria = { 451 'artist': artist.lower().strip(), 452 'recording': title.lower().strip(), 453 } 454 455 if not any(criteria.values()): 456 return 457 458 try: 459 res = musicbrainzngs.search_recordings( 460 limit=config['musicbrainz']['searchlimit'].get(int), **criteria) 461 except musicbrainzngs.MusicBrainzError as exc: 462 raise MusicBrainzAPIError(exc, 'recording search', criteria, 463 traceback.format_exc()) 464 for recording in res['recording-list']: 465 yield track_info(recording) 466 467 468def _parse_id(s): 469 """Search for a MusicBrainz ID in the given string and return it. If 470 no ID can be found, return None. 471 """ 472 # Find the first thing that looks like a UUID/MBID. 473 match = re.search(u'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) 474 if match: 475 return match.group() 476 477 478def album_for_id(releaseid): 479 """Fetches an album by its MusicBrainz ID and returns an AlbumInfo 480 object or None if the album is not found. May raise a 481 MusicBrainzAPIError. 482 """ 483 log.debug(u'Requesting MusicBrainz release {}', releaseid) 484 albumid = _parse_id(releaseid) 485 if not albumid: 486 log.debug(u'Invalid MBID ({0}).', releaseid) 487 return 488 try: 489 res = musicbrainzngs.get_release_by_id(albumid, 490 RELEASE_INCLUDES) 491 except musicbrainzngs.ResponseError: 492 log.debug(u'Album ID match failed.') 493 return None 494 except musicbrainzngs.MusicBrainzError as exc: 495 raise MusicBrainzAPIError(exc, u'get release by ID', albumid, 496 traceback.format_exc()) 497 return album_info(res['release']) 498 499 500def track_for_id(releaseid): 501 """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object 502 or None if no track is found. May raise a MusicBrainzAPIError. 503 """ 504 trackid = _parse_id(releaseid) 505 if not trackid: 506 log.debug(u'Invalid MBID ({0}).', releaseid) 507 return 508 try: 509 res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) 510 except musicbrainzngs.ResponseError: 511 log.debug(u'Track ID match failed.') 512 return None 513 except musicbrainzngs.MusicBrainzError as exc: 514 raise MusicBrainzAPIError(exc, u'get recording by ID', trackid, 515 traceback.format_exc()) 516 return track_info(res['recording']) 517