1# -*- coding: utf-8 -*- 2# 3# Copyright (C) 2018 Mark Evens 4# 5# This program is free software; you can redistribute it and/or 6# modify it under the terms of the GNU General Public License 7# as published by the Free Software Foundation; either version 2 8# of the License, or (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18# 02110-1301, USA. 19 20PLUGIN_NAME = u'Classical Extras' 21PLUGIN_AUTHOR = u'Mark Evens' 22PLUGIN_DESCRIPTION = u"""Classical Extras provides tagging enhancements for Picard and, in particular, 23utilises MusicBrainz’s hierarchy of works to provide work/movement tags. All options are set through a 24user interface in Picard options->plugins. This interface provides separate sections 25to enhance artist/performer tags, works and parts, genres and also allows for a generalised 26"tag mapping" (simple scripting). 27While it is designed to cater for the complexities of classical music tagging, 28it may also be useful for other music which has more than just basic song/artist/album data. 29<br /><br /> 30The options screen provides five tabs for users to control the tags produced: 31<br /><br /> 321. Artists: Options as to whether artist tags will contain standard MB names, aliases or as-credited names. 33Ability to include and annotate names for specialist roles (chorus master, arranger, lyricist etc.). 34Ability to read lyrics tags on the file which has been loaded and assign them to track and album levels if required. 35(Note: Picard will not normally process incoming file tags). 36<br /><br /> 372. Works and parts: The plugin will build a hierarchy of works and parts (e.g. Work -> Part -> Movement or 38Opera -> Act -> Number) based on the works in MusicBrainz's database. These can then be displayed in tags in a variety 39of ways according to user preferences. Furthermore partial recordings, medleys, arrangements and collections of works 40are all handled according to user choices. There is a processing overhead for this at present because MusicBrainz limits 41look-ups to one per second. 42<br /><br /> 433. Genres etc.: Options are available to customise the source and display of information relating to genres, 44instruments, keys, work dates and periods. Additional capabilities are provided for users of Muso (or others who 45provide the relevant XML files) to use pre-existing databases of classical genres, classical composers and classical 46periods. 47<br /><br /> 484. Tag mapping: in some ways, this is a simple substitute for some of Picard's scripting capability. The main advantage 49 is that the plugin will remember what tag mapping you use for each release (or even track). 50<br /><br /> 515. Advanced: Various options to control the detailed processing of the above. 52<br /><br /> 53All user options can be saved on a per-album (or even per-track) basis so that tweaks can be used to deal with 54inconsistencies in the MusicBrainz data (e.g. include English titles from the track listing where the MusicBrainz works 55are in the composer's language and/or script). 56Also existing file tags can be processed (not possible in native Picard). 57<br /><br /> 58See the readme file <a href="https://github.com/MetaTunes/picard-plugins/tree/metabrainz/2.0/plugins/classical_extras"> 59on GitHub here</a> for full details. 60""" 61 62######################## 63# DEVELOPERS NOTES: #### 64######################## 65# This plugin contains 3 classes: 66# 67# I. ("EXTRA ARTISTS") Create sorted fields for all performers. Creates a number of variables with alternative values 68# for "artists" and "artist". 69# Creates an ensemble variable for all ensemble-type performers. 70# Also creates matching sort fields for artist and artists. 71# Additionally create tags for artist types which are not normally created in Picard - particularly for classical music 72# (notably instrument arrangers). 73# 74# II. ("PART LEVELS" [aka Work Parts]) Create tags for the hierarchy of works which contain a given track recording 75# - particularly for classical music' 76# Variables provided for each work level, with implied part names 77# Mixed metadata provided including work and title elements 78# 79# III. ("OPTIONS") Allows the user to set various options including what tags will be written 80# (otherwise the classes above will just write outputs to "hidden variables") 81# 82# The main control routine is at the end of the module 83 84PLUGIN_VERSION = '2.0.12' 85PLUGIN_API_VERSIONS = ["2.0", "2.1", "2.2", "2.3", "2.4"] 86PLUGIN_LICENSE = "GPL-2.0" 87PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html" 88 89from picard.ui.options import register_options_page, OptionsPage 90from picard.plugins.classical_extras.ui_options_classical_extras import Ui_ClassicalExtrasOptionsPage 91import picard.plugins.classical_extras.suffixtree 92from picard import config, log 93from picard.config import ConfigSection, BoolOption, IntOption, TextOption 94from picard.util import LockableObject, uniqify 95 96# note that in 2.0 picard.webservice changed to picard.util.xml 97from picard.util.xml import XmlNode 98from picard.util import translate_from_sortname 99from picard.metadata import register_track_metadata_processor, Metadata 100from functools import partial 101from datetime import datetime 102import collections 103import re 104import unicodedata 105import json 106import copy 107import os 108from PyQt5.QtCore import QXmlStreamReader 109from picard.const import USER_DIR 110import operator 111import ast 112import picard.plugins.classical_extras.const 113 114 115 116########################## 117# MODULE-WIDE COMPONENTS # 118########################## 119# CONSTANTS 120# N.B. Constants with long definitions are set in const.py 121DATE_SEP = '-' 122 123# COMMONLY USED REGEX 124ROMAN_NUMERALS = r'\b((?=[MDCLXVI])(M{0,4}(CM|CD|D?)?C{0,3}(XC|XL|L?)?X{0,3}(IX|IV|V?)?I{0,3}))(?:\.|\-|:|;|,|\s|$)' 125ROMAN_NUMERALS_AT_START = r'^\W*' + ROMAN_NUMERALS 126RE_ROMANS = re.compile(ROMAN_NUMERALS, re.IGNORECASE) 127RE_ROMANS_AT_START = re.compile(ROMAN_NUMERALS_AT_START, re.IGNORECASE) 128# KEYS 129RE_NOTES = r'(\b[ABCDEFG])' 130RE_ACCENTS = r'(\-sharp(?:\s+|\b)|\-flat(?:\s+|\b)|\ssharp(?:\s+|\b)|\sflat(?:\s+|\b)|\u266F(?:\s+|\b)|\u266D(?:\s+|\b)|(?:[:,.]?\s+|$|\-))' 131RE_SCALES = r'(major|minor)?(?:\b|$)' 132RE_KEYS = re.compile( 133 RE_NOTES + RE_ACCENTS + RE_SCALES, 134 re.UNICODE | re.IGNORECASE) 135 136# LOGGING 137 138# If logging occurs before any album is loaded, the startup log file will 139# be written 140log_files = collections.defaultdict(dict) 141# entries are release-ids: to keep track of which log files are open 142release_status = collections.defaultdict(dict) 143# release_status[release_id]['works'] = True indicates that we are still processing works for release_id 144# & similarly for 'artists' 145# release_status[release_id]['start'] holds start time of release processing 146# release_status[release_id]['name'] holds the album name 147# release_status[release_id]['lookups'] holds number of lookups for this release 148# release_status[release_id]['file_objects'] holds a cumulative list of file objects (tagger seems a bit unreliable) 149# release_status[release_id]['file_found'] = False indicates that "No file 150# with matching trackid" has (yet) been found 151 152 153def write_log(release_id, log_type, message, *args): 154 """ 155 Custom logging function - if log_info is set, all messages will be written to a custom file in a 'Classical_Extras' 156 subdirectory in the same directory as the main Picard log. A different file is used for each album, 157 to aid in debugging - the log file is release_id.log. Any startup messages (i.e. before a release has been loaded) 158 are written to session.log. Summary information for each release is also written to session.log even if log_info 159 is not set. 160 :param release_id: name for log file - usually =musicbrainz_albumid 161 unless called outside metadata processor 162 :param log_type: 'error', 'warning', 'debug' or 'info' 163 :param message: string, e.g. 'error message for workid: %s' 164 :param args: arguments for parameters in string, e.g. if workId then str(workId) will replace %s in the above 165 :return: 166 """ 167 options = config.setting 168 if not isinstance(message, str): 169 msg = repr(message) 170 else: 171 msg = message 172 if args: 173 msg = msg % args 174 175 if options["log_info"] or log_type == "basic": 176 # if log_info is True, all log messages will be written to the custom log, regardless of other log_... settings 177 # basic session log will always be written (summary of releases and 178 # processing times) 179 filename = release_id + ".log" 180 log_dir = os.path.join(USER_DIR, "Classical_Extras") 181 if not os.path.exists(log_dir): 182 os.makedirs(log_dir) 183 if release_id not in log_files: 184 try: 185 if release_id == 'session': 186 log_file = open( 187 os.path.join( 188 log_dir, 189 filename), 190 'w', 191 encoding='utf8', 192 buffering=1) 193 # buffering=1 so that session log (low volume) is up to 194 # date even if not closed 195 else: 196 log_file = open( 197 os.path.join( 198 log_dir, 199 filename), 200 'w', 201 encoding='utf8') # , buffering=1) 202 # default buffering for speed, buffering = 1 for currency 203 log_files[release_id] = log_file 204 log_file.write( 205 PLUGIN_NAME + 206 ' Version:' + 207 PLUGIN_VERSION + 208 '\n') 209 if release_id == 'session': 210 log_file.write('session' + '\n') 211 else: 212 log_file.write('Release id: ' + release_id + '\n') 213 if release_id in release_status and 'name' in release_status[release_id]: 214 log_file.write( 215 'Album name: ' + release_status[release_id]['name'] + '\n') 216 except IOError: 217 log.error('Unable to open file %s for writing log', filename) 218 return 219 else: 220 log_file = log_files[release_id] 221 try: 222 log_file.write(log_type[0].upper() + ': ') 223 log_file.write(str(datetime.now()) + ' : ') 224 log_file.write(msg) 225 log_file.write("\n") 226 except IOError: 227 log.error('Unable to write to log file %s', filename) 228 return 229 # Only debug, warning and error messages will be written to the main 230 # Picard log, if those options have been set 231 if log_type != 'info' and log_type != 'basic': # i.e. non-custom log items 232 message2 = PLUGIN_NAME + ': ' + message 233 else: 234 message2 = message 235 if log_type == 'debug' and options["log_debug"]: 236 if release_id in release_status and 'debug' in release_status[release_id]: 237 add_list_uniquely(release_status[release_id]['debug'], msg) 238 else: 239 release_status[release_id]['debug'] = [msg] 240 log.debug(message2, *args) 241 if log_type == 'warning' and options["log_warning"]: 242 if release_id in release_status and 'warnings' in release_status[release_id]: 243 add_list_uniquely(release_status[release_id]['warnings'], msg) 244 else: 245 release_status[release_id]['warnings'] = [msg] 246 if args: 247 log.warning(message2, *args) 248 else: 249 log.warning(message2) 250 if log_type == 'error' and options["log_error"]: 251 if release_id in release_status and 'errors' in release_status[release_id]: 252 add_list_uniquely(release_status[release_id]['errors'], msg) 253 else: 254 release_status[release_id]['errors'] = [msg] 255 if args: 256 log.error(message2, *args) 257 else: 258 log.error(message2) 259 260 261def close_log(release_id, caller): 262 # close the custom log file if we are done 263 if release_id == 'session': # shouldn't happen but, just in case, don't close the session log 264 return 265 if caller in ['works', 'artists']: 266 release_status[release_id][caller] = False 267 if (caller == 'works' and release_status[release_id]['artists']) or \ 268 (caller == 'artists' and release_status[release_id]['works']): 269 # log.error('exiting close_log. only %s done', caller) # debug line 270 return 271 duration = 'N/A' 272 lookups = 'N/A' 273 artists_time = 0 274 works_time = 0 275 lookup_time = 0 276 album_process_time = 0 277 if release_id in release_status: 278 duration = datetime.now() - release_status[release_id]['start'] 279 lookups = release_status[release_id]['lookups'] 280 done_lookups = release_status[release_id]['done-lookups'] 281 lookup_time = done_lookups - release_status[release_id]['start'] 282 album_process_time = duration - lookup_time 283 artists_time = release_status[release_id]['artists-done'] - \ 284 release_status[release_id]['start'] 285 works_time = release_status[release_id]['works-done'] - \ 286 release_status[release_id]['start'] 287 del release_status[release_id]['start'] 288 del release_status[release_id]['lookups'] 289 del release_status[release_id]['done-lookups'] 290 del release_status[release_id]['artists-done'] 291 del release_status[release_id]['works-done'] 292 if release_id in log_files: 293 write_log( 294 release_id, 295 'info', 296 'Duration = %s. Number of lookups = %s.', 297 duration, 298 lookups) 299 write_log(release_id, 'info', 'Closing log file for %s', release_id) 300 log_files[release_id].close() 301 del log_files[release_id] 302 if 'session' in log_files and release_id in release_status: 303 write_log( 304 'session', 305 'basic', 306 '\n Completed processing release id %s. Details below:-', 307 release_id) 308 if 'name' in release_status[release_id]: 309 write_log('session', 'basic', 'Album name %s', 310 release_status[release_id]['name']) 311 if 'errors' in release_status[release_id]: 312 write_log( 313 'session', 314 'basic', 315 '-------------------- Errors --------------------') 316 for error in release_status[release_id]['errors']: 317 write_log('session', 'basic', error) 318 del release_status[release_id]['errors'] 319 if 'warnings' in release_status[release_id]: 320 write_log( 321 'session', 322 'basic', 323 '-------------------- Warnings --------------------') 324 for warning in release_status[release_id]['warnings']: 325 write_log('session', 'basic', warning) 326 del release_status[release_id]['warnings'] 327 if 'debug' in release_status[release_id]: 328 write_log( 329 'session', 330 'basic', 331 '-------------------- Debug log --------------------') 332 for debug in release_status[release_id]['debug']: 333 write_log('session', 'basic', debug) 334 del release_status[release_id]['debug'] 335 write_log( 336 'session', 337 'basic', 338 'Duration = %s. Artists time = %s. Works time = %s. Of which: Lookup time = %s. ' 339 'Album-process time = %s. Number of lookups = %s.', 340 duration, 341 artists_time, 342 works_time, 343 lookup_time, 344 album_process_time, 345 lookups) 346 if release_id in release_status: 347 del release_status[release_id] 348 349 350# FILE READING AND OBJECT PARSING 351 352_node_name_re = re.compile('[^a-zA-Z0-9]') 353 354 355def _node_name(n): 356 return _node_name_re.sub('_', str(n)) 357 358 359def _read_xml(stream): 360 document = XmlNode() 361 current_node = document 362 path = [] 363 while not stream.atEnd(): 364 stream.readNext() 365 if stream.isStartElement(): 366 node = XmlNode() 367 attrs = stream.attributes() 368 for i in range(attrs.count()): 369 attr = attrs.at(i) 370 node.attribs[_node_name(attr.name())] = str(attr.value()) 371 current_node.append_child(_node_name(stream.name()), node) 372 path.append(current_node) 373 current_node = node 374 elif stream.isEndElement(): 375 current_node = path.pop() 376 elif stream.isCharacters(): 377 current_node.text += str(stream.text()) 378 return document 379 380 381def parse_data(release_id, obj, response_list, *match): 382 """ 383 This function takes any XmlNode object, or list thereof, or a JSON object 384 and extracts a list of all objects exactly matching the hierarchy listed in match. 385 match should contain list of each node in hierarchical sequence, with no gaps in the sequence 386 of nodes, to lowest level required. 387 :param release_id: name for log file - usually =musicbrainz_albumid 388 unless called outside metadata processor 389 :param obj: an XmlNode or JSON object, list or dictionary containing nodes 390 :param response_list: working memory for recursive calls 391 :param match: list of items to search for in node (see detailed notes below) 392 :return: a list of matching items (always a list, even if only one item) 393 394 Insert attribs.attribname:attribvalue in the list to select only branches where attribname 395 is attribvalue. (Omit the attribs prefix if the obj is JSON) 396 Insert childname.text:childtext in the list to select only branches where 397 a sibling with childname has text childtext. 398 (Note: childname can be a dot-list if the text is more than one level down - e.g. child1.child2 399 # TODO - Check this works fully ) 400 """ 401 if '!log' in response_list: 402 DEBUG = True 403 INFO = True 404 else: 405 DEBUG = False 406 INFO = False 407 # Normally logging options are off as these can be VERY wordy 408 # They can be turned on by using !log in the call 409 410 # XmlNode instances are not iterable, so need to convert to dict 411 if isinstance(obj, XmlNode): 412 obj = obj.__dict__ 413 if DEBUG or INFO: 414 write_log(release_id, 'debug', 'Parsing data - looking for %s', match) 415 if INFO: 416 write_log(release_id, 'info', 'Looking in object: %s', obj) 417 if isinstance(obj, list): 418 objlen = len(obj) 419 for i, item in enumerate(obj): 420 if isinstance(item, XmlNode): 421 item = item.__dict__ 422 if INFO: 423 write_log( 424 release_id, 425 'info', 426 'Getting response for list item no.%s of %s - object is: %s', 427 i + 1, 428 objlen, 429 item) 430 parse_data(release_id, item, response_list, *match) 431 if INFO: 432 write_log( 433 release_id, 434 'info', 435 'response_list for list item no.%s of %s is %s', 436 i + 1, 437 objlen, 438 response_list) 439 return response_list 440 elif isinstance(obj, dict): 441 if match[0] in obj: 442 if len(match) == 1: 443 response = obj[match[0]] 444 if response is not None: # To prevent adding NoneTypes to list 445 response_list.append(response) 446 if INFO: 447 write_log( 448 release_id, 449 'info', 450 'response_list (last match item): %s', 451 response_list) 452 else: 453 match_list = list(match) 454 match_list.pop(0) 455 parse_data(release_id, obj[match[0]], 456 response_list, *match_list) 457 if INFO: 458 write_log( 459 release_id, 460 'info', 461 'response_list (passing up): %s', 462 response_list) 463 return response_list 464 elif ':' in match[0]: 465 test = match[0].split(':') 466 match2 = test[0].split('.') 467 test_data = parse_data(release_id, obj, [], *match2) 468 if INFO: 469 write_log( 470 release_id, 471 'info', 472 'Value comparison - looking in %s for value %s', 473 test_data, 474 test[1]) 475 if len(test) > 1: 476 # latter is because Booleans are stored as such, not as 477 # strings, in JSON 478 if (test[1] in test_data) or ( 479 (test[1] == 'True') in test_data): 480 if len(match) == 1: 481 response = obj 482 if response is not None: 483 response_list.append(response) 484 else: 485 match_list = list(match) 486 match_list.pop(0) 487 parse_data(release_id, obj, response_list, *match_list) 488 else: 489 parse_data(release_id, obj, response_list, *match2) 490 if INFO: 491 write_log( 492 release_id, 493 'info', 494 'response_list (from value look-up): %s', 495 response_list) 496 return response_list 497 else: 498 if 'children' in obj: 499 parse_data(release_id, obj['children'], response_list, *match) 500 if INFO: 501 write_log( 502 release_id, 503 'info', 504 'response_list (from children): %s', 505 response_list) 506 return response_list 507 else: 508 if INFO: 509 write_log( 510 release_id, 511 'info', 512 'response_list (obj is not a list or dict): %s', 513 response_list) 514 return response_list 515 516 517def create_dict_from_ref_list(options, release_id, ref_list, keys, tags): 518 ref_dict_list = [] 519 for refs in ref_list: 520 for ref in refs: 521 parsed_refs = [ 522 parse_data( 523 release_id, 524 ref, 525 [], 526 t, 527 'text') for t in tags] 528 ref_dict_list.append(dict(zip(keys, parsed_refs))) 529 return ref_dict_list 530 531 532def get_references_from_file(release_id, path, filename): 533 """ 534 Lookup Muso Reference.xml or similar 535 :param release_id: name of log file 536 :param path: Reference file path 537 :param filename: Reference file name 538 :return: 539 """ 540 options = config.setting 541 composer_dict_list = [] 542 period_dict_list = [] 543 genre_dict_list = [] 544 xml_file = None 545 try: 546 xml_file = open(os.path.join(path, filename), encoding="utf8") 547 reply = xml_file.read() 548 xml_file.close() 549 document = _read_xml(QXmlStreamReader(reply)) 550 # Composers 551 composer_list = parse_data( 552 release_id, document, [], 'ReferenceDB', 'Composer') 553 keys = ['name', 'sort', 'birth', 'death', 'country', 'core'] 554 tags = ['Name', 'Sort', 'Birth', 'Death', 'CountryCode', 'Core'] 555 composer_dict_list = create_dict_from_ref_list( 556 options, release_id, composer_list, keys, tags) 557 # Periods 558 period_list = parse_data( 559 release_id, 560 document, 561 [], 562 'ReferenceDB', 563 'ClassicalPeriod') 564 keys = ['name', 'start', 'end'] 565 tags = ['Name', 'Start_x0020_Date', 'End_x0020_Date'] 566 period_dict_list = create_dict_from_ref_list( 567 options, release_id, period_list, keys, tags) 568 # Genres 569 genre_list = parse_data( 570 release_id, 571 document, 572 [], 573 'ReferenceDB', 574 'ClassicalGenre') 575 keys = ['name'] 576 tags = ['Name'] 577 genre_dict_list = create_dict_from_ref_list( 578 options, release_id, genre_list, keys, tags) 579 580 except (IOError, FileNotFoundError, UnicodeDecodeError): 581 if options['cwp_muso_genres'] or options['cwp_muso_classical'] or options['cwp_muso_dates'] or options['cwp_muso_periods']: 582 write_log( 583 release_id, 584 'error', 585 'File %s does not exist or is corrupted', 586 os.path.join( 587 path, 588 filename)) 589 finally: 590 if xml_file: 591 xml_file.close() 592 return { 593 'composers': composer_dict_list, 594 'periods': period_dict_list, 595 'genres': genre_dict_list} 596 597# OPTIONS 598 599 600def get_preserved_tags(): 601 preserved = config.setting["preserved_tags"] 602 if isinstance(preserved, str): 603 preserved = [x.strip() for x in preserved.split(',')] 604 return preserved 605 606 607def get_options(release_id, album, track): 608 """ 609 Get the saved options from a release and use them according to flags set on the "advanced" tab 610 :param release_id: name for log file - usually =musicbrainz_albumid 611 unless called outside metadata processor 612 :param album: current release 613 :param track: current track 614 :return: None (result is passed via tm) 615 A common function for both Artist and Workparts, so that the first class to process a track will execute 616 this function so that the results are available to both (via a track metadata item) 617 """ 618 release_status[release_id]['done'] = False 619 set_options = collections.defaultdict(dict) 620 main_sections = ['artists', 'workparts'] 621 all_sections = ['artists', 'tag', 'workparts', 'genres'] 622 parent_sections = { 623 'artists': 'artists', 624 'tag': 'artists', 625 'workparts': 'workparts', 626 'genres': 'workparts'} 627 # The above needs to be done for legacy reasons - there are only two tags which store options - artists and workparts 628 # This dates from when there were only two sections 629 # To split these now will create compatibility issues 630 override = { 631 'artists': 'cea_override', 632 'tag': 'ce_tagmap_override', 633 'workparts': 'cwp_override', 634 'genres': 'ce_genres_override'} 635 sect_text = {'artists': 'Artists', 'workparts': 'Works'} 636 prefix = {'artists': 'cea', 'workparts': 'cwp'} 637 638 if album.tagger.config.setting['ce_options_overwrite'] and all( 639 album.tagger.config.setting[override[sect]] for sect in main_sections): 640 set_options[track] = album.tagger.config.setting # mutable 641 else: 642 set_options[track] = option_settings( 643 album.tagger.config.setting) # make a copy 644 if set_options[track]["log_info"]: 645 write_log( 646 release_id, 647 'info', 648 'Default (i.e. per UI) options for track %s are %r', 649 track, 650 set_options[track]) 651 652 # As we use some of the main Picard options and may over-write them, save them here 653 # set_options[track]['translate_artist_names'] = config.setting['translate_artist_names'] 654 # set_options[track]['standardize_artists'] = config.setting['standardize_artists'] 655 # (not sure this is needed - TODO reconsider) 656 657 options = set_options[track] 658 tm = track.metadata 659 new_metadata = None 660 orig_metadata = None 661 # Only look up files if needed 662 file_options = {} 663 music_file = '' 664 music_file_found = None 665 release_status[release_id]['file_found'] = False 666 start = datetime.now() 667 if options["log_info"]: 668 write_log(release_id, 'info', 'Clock start at %s', start) 669 trackno = tm['tracknumber'] 670 discno = tm['discnumber'] 671 672 album_filenames = album.tagger.get_files_from_objects([album]) 673 if options["log_info"]: 674 write_log( 675 release_id, 676 'info', 677 'No. of album files found = %s', 678 len(album_filenames)) 679 # Note that sometimes Picard fails to get all the file objects, even if they are there (network issues) 680 # so we will cache whatever we can get! 681 if release_id in release_status and 'file_objects' in release_status[release_id]: 682 add_list_uniquely( 683 release_status[release_id]['file_objects'], 684 album_filenames) 685 else: 686 release_status[release_id]['file_objects'] = album_filenames 687 if options["log_info"]: 688 write_log(release_id, 'info', 'No. of album files cached = %s', 689 len(release_status[release_id]['file_objects'])) 690 track_file = None 691 for album_file in release_status[release_id]['file_objects']: 692 if options["log_info"]: 693 write_log(release_id, 694 'info', 695 'Track file = %s, tracknumber = %s, discnumber = %s. Metadata trackno = %s, discno = %s', 696 album_file.filename, 697 str(album_file.tracknumber), 698 str(album_file.discnumber), 699 trackno, 700 discno) 701 if str( 702 album_file.tracknumber) == trackno and str( 703 album_file.discnumber) == discno: 704 if options["log_info"]: 705 write_log( 706 release_id, 707 'info', 708 'Track file found = %r', 709 album_file.filename) 710 track_file = album_file.filename 711 break 712 713 # Note: It would have been nice to do a rough check beforehand of total tracks, 714 # but ~totalalbumtracks is not yet populated 715 if not track_file: 716 album_fullnames = [ 717 x.filename for x in release_status[release_id]['file_objects']] 718 if options["log_info"]: 719 write_log( 720 release_id, 721 'info', 722 'Album files found = %r', 723 album_fullnames) 724 for music_file in album_fullnames: 725 new_metadata = album.tagger.files[music_file].metadata 726 727 if 'musicbrainz_trackid' in new_metadata and 'musicbrainz_trackid' in tm: 728 if new_metadata['musicbrainz_trackid'] == tm['musicbrainz_trackid']: 729 track_file = music_file 730 break 731 # Nothing found... 732 if new_metadata and 'musicbrainz_trackid' not in new_metadata: 733 if options['log_warning']: 734 write_log( 735 release_id, 736 'warning', 737 'No trackid in file %s', 738 music_file) 739 if 'musicbrainz_trackid' not in tm: 740 if options['log_warning']: 741 write_log( 742 release_id, 743 'warning', 744 'No trackid in track %s', 745 track) 746 # 747 # Note that, on initial load, new_metadata == orig_metadata; but, after refresh, new_metadata will have 748 # the same track metadata as tm (plus the file metadata as per orig_metadata), so a trackid match 749 # is then possible for files that do not have musicbrainz_trackid in orig_metadata. That is why 750 # new_metadata is used in the above test, rather than orig_metadata, but orig_metadata is then used below 751 # to get the saved options. 752 # 753 754 # Find the tag with the options:- 755 if track_file: 756 orig_metadata = album.tagger.files[track_file].orig_metadata 757 music_file_found = track_file 758 if options['log_info']: 759 write_log( 760 release_id, 761 'info', 762 'orig_metadata for file %s is', 763 music_file) 764 write_log(release_id, 'info', orig_metadata) 765 for child_section in all_sections: 766 section = parent_sections[child_section] 767 if options[override[child_section]]: 768 if options[prefix[section] + '_options_tag'] + ':' + \ 769 section + '_options' in orig_metadata: 770 file_options[section] = interpret( 771 orig_metadata[options[prefix[section] + '_options_tag'] + ':' + section + '_options']) 772 elif options[prefix[section] + '_options_tag'] in orig_metadata: 773 options_tag_contents = orig_metadata[options[prefix[section] + '_options_tag']] 774 if isinstance(options_tag_contents, list): 775 options_tag_contents = options_tag_contents[0] 776 combined_options = ''.join(options_tag_contents.split( 777 '(workparts_options)')).split('(artists_options)') 778 for i, _ in enumerate(combined_options): 779 combined_options[i] = interpret( 780 combined_options[i].lstrip('; ')) 781 if isinstance( 782 combined_options[i], 783 dict) and 'Classical Extras' in combined_options[i]: 784 if sect_text[section] + \ 785 ' options' in combined_options[i]['Classical Extras']: 786 file_options[section] = combined_options[i] 787 else: 788 for om in orig_metadata: 789 if ':' + section + '_options' in om: 790 file_options[section] = interpret( 791 orig_metadata[om]) 792 if section not in file_options or not file_options[section]: 793 if options['log_error']: 794 write_log( 795 release_id, 796 'error', 797 'Saved ' + 798 section + 799 ' options cannot be read for file %s. Using current settings', 800 music_file) 801 append_tag( 802 release_id, 803 tm, 804 '~' + 805 prefix[section] + 806 '_error', 807 '1. Saved ' + 808 section + 809 ' options cannot be read. Using current settings') 810 811 release_status[release_id]['file_found'] = True 812 813 end = datetime.now() 814 if options['log_info']: 815 write_log(release_id, 'info', 'Clock end at %s', end) 816 write_log(release_id, 'info', 'Duration = %s', end - start) 817 818 if not release_status[release_id]['file_found']: 819 if options['log_warning']: 820 write_log( 821 release_id, 822 'warning', 823 "No file with matching trackid for track %s. IF THERE SHOULD BE ONE, TRY 'REFRESH'", 824 track) 825 append_tag( 826 release_id, 827 tm, 828 "002_important_warning", 829 "No file with matching trackid - IF THERE SHOULD BE ONE, TRY 'REFRESH' - " 830 "(unable to process any saved options, lyrics or 'keep' tags)") 831 # Nothing else is done with this info as yet - ideally we need to refresh and re-run 832 # for all releases where, say, release_status[release_id]['file_prob'] 833 # == True TODO? 834 835 else: 836 if options['log_info']: 837 write_log( 838 release_id, 839 'info', 840 'Found music file: %r', 841 music_file_found) 842 for section in all_sections: 843 if options[override[section]]: 844 parent_section = parent_sections[section] 845 if parent_section in file_options and file_options[parent_section]: 846 try: 847 options_dict = file_options[parent_section]['Classical Extras'][sect_text[parent_section] + ' options'] 848 except TypeError as err: 849 if options['log_error']: 850 write_log( 851 release_id, 852 'error', 853 'Error: %s. Saved ' + 854 section + 855 ' options cannot be read for file %s. Using current settings', 856 err, 857 music_file) 858 append_tag( 859 release_id, 860 tm, 861 '~' + 862 prefix[parent_section] + 863 '_error', 864 '1. Saved ' + 865 parent_section + 866 ' options cannot be read. Using current settings') 867 break 868 for opt in options_dict: 869 if isinstance( 870 options_dict[opt], 871 dict) and options[override['tag']]: # for tag line options 872 # **NB tag mapping lines are the only entries of type dict** 873 opt_list = [] 874 for opt_item in options_dict[opt]: 875 opt_list.append( 876 {opt + '_' + opt_item: options_dict[opt][opt_item]}) 877 else: 878 opt_list = [{opt: options_dict[opt]}] 879 for opt_dict in opt_list: 880 for opt_det in opt_dict: 881 opt_value = opt_dict[opt_det] 882 addn = [] 883 if section == 'artists': 884 addn = plugin_options('picard') 885 if section == 'tag': 886 addn = plugin_options('tag_detail') 887 for ea_opt in plugin_options(section) + addn: 888 displayed_option = options[ea_opt['option']] 889 if ea_opt['name'] == opt_det: 890 if 'value' in ea_opt: 891 if ea_opt['value'] == opt_value: 892 options[ea_opt['option']] = True 893 else: 894 options[ea_opt['option'] 895 ] = False 896 else: 897 options[ea_opt['option'] 898 ] = opt_value 899 if options[ea_opt['option'] 900 ] != displayed_option: 901 if options['log_debug'] or options['log_info']: 902 write_log( 903 release_id, 904 'info', 905 'Options overridden for option %s = %s', 906 ea_opt['option'], 907 opt_value) 908 909 opt_text = str(opt_value) 910 append_tag( 911 release_id, tm, '003_information:options_overridden', str( 912 ea_opt['name']) + ' = ' + opt_text) 913 914 if orig_metadata: 915 keep_list = options['cea_keep'].split(",") 916 if options['cea_split_lyrics'] and options['cea_lyrics_tag']: 917 keep_list.append(options['cea_lyrics_tag']) 918 if options['cwp_genres_use_file']: 919 if 'genre' in orig_metadata: 920 append_tag( 921 release_id, 922 tm, 923 '~cwp_candidate_genres', 924 orig_metadata['genre']) 925 if options['cwp_genre_tag'] and options['cwp_genre_tag'] in orig_metadata: 926 keep_list.append(options['cwp_genre_tag']) 927 really_keep_list = get_preserved_tags()[:] 928 really_keep_list.append( 929 options['cwp_options_tag'] + 930 ':workparts_options') 931 really_keep_list.append( 932 options['cea_options_tag'] + 933 ':artists_options') 934 for tagx in keep_list: 935 tag = tagx.strip() 936 really_keep_list.append(tag) 937 if tag in orig_metadata: 938 append_tag(release_id, tm, tag, orig_metadata[tag]) 939 if options['cea_clear_tags']: 940 delete_list = [] 941 for tag_item in orig_metadata: 942 if tag_item not in really_keep_list and tag_item[0] != '~': 943 # the second condition is to ensure that (hidden) file variables are not deleted, 944 # as these are in orig_metadata, not track_metadata 945 delete_list.append(tag_item) 946 # this will be used in map_tags to delete unwanted tags 947 options['delete_tags'] = delete_list 948 ## Create a "mirror" tag with the old data, for comparison purposes 949 mirror_tags = [] 950 for tag_item in orig_metadata: 951 mirror_name = tag_item + '_OLD' 952 if mirror_name[0] == '~' : 953 mirror_name.replace('~', '_') 954 mirror_name = '~' + mirror_name 955 mirror_tags.append((mirror_name, tag_item)) 956 append_tag(release_id, tm, mirror_name, orig_metadata[tag_item]) 957 append_tag(release_id, tm, '~ce_mirror_tags', mirror_tags) 958 959 if not isinstance(options, dict): 960 options_dict = option_settings(config.setting) 961 write_log( 962 'session', 963 'info', 964 'Using option_settings(config.setting): %s', 965 options_dict) 966 else: 967 options_dict = options 968 write_log( 969 'session', 970 'info', 971 'Using options: %s', 972 options_dict) 973 tm['~ce_options'] = str(options_dict) 974 tm['~ce_file'] = music_file_found 975 976 977def plugin_options(option_type): 978 """ 979 :param option_type: artists, tag, workparts, genres or other 980 :return: the relevant dictionary for the type 981 This function contains all the options data in one place - to prevent multiple repetitions elsewhere 982 """ 983 if option_type == 'artists': 984 return const.ARTISTS_OPTIONS 985 elif option_type == 'tag': 986 return const.TAG_OPTIONS 987 elif option_type == 'tag_detail': 988 return const.TAG_DETAIL_OPTIONS 989 elif option_type == 'workparts': 990 return const.WORKPARTS_OPTIONS 991 elif option_type == 'genres': 992 return const.GENRE_OPTIONS 993 elif option_type == 'picard': 994 return const.PICARD_OPTIONS 995 elif option_type == 'other': 996 return const.OTHER_OPTIONS 997 else: 998 return None 999 1000def option_settings(config_settings): 1001 """ 1002 :param config_settings: options from UI 1003 :return: a (deep) copy of the Classical Extras options 1004 """ 1005 options = {} 1006 for option in plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') + plugin_options( 1007 'workparts') + plugin_options('genres') + plugin_options('picard') + plugin_options('other'): 1008 options[option['option']] = copy.deepcopy( 1009 config_settings[option['option']]) 1010 return options 1011 1012 1013def get_aliases(self, release_id, album, options, releaseXmlNode): 1014 """ 1015 :param release_id: name for log file - usually =musicbrainz_albumid 1016 unless called outside metadata processor 1017 :param self: 1018 :param album: 1019 :param options: 1020 :param releaseXmlNode: all the metadata for the release 1021 :return: Data is returned via self.artist_aliases and self.artist_credits[album] 1022 1023 Note regarding aliases and credited-as names: 1024 In a MB release, an artist can appear in one of seven contexts. Each of these is accessible in releaseXmlNode 1025 and the track and recording contexts are also accessible in trackXmlNode. 1026 The seven contexts are: 1027 Recording: credited-as and alias 1028 Release-group: credited-as and alias 1029 Release: credited-as and alias 1030 Release relationship: credited-as and (not reliably?) alias 1031 Recording relationship (direct): credited-as and (not reliably?) alias 1032 Recording relationship (via work): credited-as and (not reliably?) alias 1033 Track: credited-as and alias 1034 (The above are applied in sequence - e.g. track artist credit will over-ride release artist credit. "Recording" gets 1035 the lowest priority as it is more generic than the release data {may apply to multiple releases}) 1036 This function collects all the available aliases and as-credited names once (on processing the first track). 1037 N.B. if more than one release is loaded in Picard, any available alias names loaded so far will be available 1038 and used. However, as-credited names will only be used from the current release.""" 1039 1040 if 'artist_locale' in config.setting and options['cea_aliases'] or options['cea_aliases_composer']: 1041 locale = config.setting["artist_locale"] 1042 lang = locale.split("_")[0] # NB this is the Picard code in /util 1043 1044 # Track and recording aliases/credits are gathered by parsing the 1045 # media, track and recording nodes 1046 # Do the recording relationship first as it may apply to multiple releases, so release and track data 1047 # is more specific. 1048 media = parse_data(release_id, releaseXmlNode, [], 'media') 1049 for m in media: 1050 # disc_num = int(parse_data(options, m, [], 'position', 'text')[0]) 1051 # not currently used 1052 tracks = parse_data(release_id, m, [], 'tracks') 1053 for track in tracks: 1054 for t in track: 1055 # track_num = int(parse_data(options, t, [], 'number', 1056 # 'text')[0]) # not currently used 1057 1058 # Recording artists 1059 obj = parse_data(release_id, t, [], 'recording') 1060 get_aliases_and_credits( 1061 self, 1062 options, 1063 release_id, 1064 album, 1065 obj, 1066 lang, 1067 options['cea_recording_credited']) 1068 1069 # Get the release data before the recording relationshiops and track data 1070 # Release group artists 1071 obj = parse_data(release_id, releaseXmlNode, [], 'release-group') 1072 get_aliases_and_credits( 1073 self, 1074 options, 1075 release_id, 1076 album, 1077 obj, 1078 lang, 1079 options['cea_group_credited']) 1080 1081 # Release artists 1082 get_aliases_and_credits( 1083 self, 1084 options, 1085 release_id, 1086 album, 1087 releaseXmlNode, 1088 lang, 1089 options['cea_credited']) 1090 # Next bit needed to identify artists who are album artists 1091 self.release_artists_sort[album] = parse_data( 1092 release_id, releaseXmlNode, [], 'artist-credit', 'artist', 'sort-name') 1093 # Release relationship artists 1094 get_relation_credits( 1095 self, 1096 options, 1097 release_id, 1098 album, 1099 releaseXmlNode, 1100 lang, 1101 options['cea_release_relationship_credited']) 1102 1103 # Now get the rest: 1104 for m in media: 1105 tracks = parse_data(release_id, m, [], 'tracks') 1106 for track in tracks: 1107 for t in track: 1108 # Recording relationship artists 1109 obj = parse_data(release_id, t, [], 'recording') 1110 get_relation_credits( 1111 self, 1112 options, 1113 release_id, 1114 album, 1115 obj, 1116 lang, 1117 options['cea_recording_relationship_credited']) 1118 # Track artists 1119 get_aliases_and_credits( 1120 self, 1121 options, 1122 release_id, 1123 album, 1124 t, 1125 lang, 1126 options['cea_track_credited']) 1127 1128 if options['log_info']: 1129 write_log(release_id, 'info', 'Alias and credits info for %s', self) 1130 write_log(release_id, 'info', 'Aliases :%s', self.artist_aliases) 1131 write_log( 1132 release_id, 1133 'info', 1134 'Credits :%s', 1135 self.artist_credits[album]) 1136 1137 1138def get_artists(options, release_id, tm, relations, relation_type): 1139 """ 1140 Get artist info from XML lookup 1141 :param release_id: name for log file - usually =musicbrainz_albumid 1142 unless called outside metadata processor 1143 :param options: 1144 :param tm: 1145 :param relations: 1146 :param relation_type: 'release', 'recording' or 'work' (NB 'work' does not pass a param for tm) 1147 :return: 1148 """ 1149 if options['log_debug'] or options['log_info']: 1150 write_log( 1151 release_id, 1152 'debug', 1153 'In get_artists. relation_type: %s, relations: %s', 1154 relation_type, 1155 relations) 1156 log_options = { 1157 'log_debug': options['log_debug'], 1158 'log_info': options['log_info']} 1159 artists = [] 1160 instruments = [] 1161 artist_types = const.RELATION_TYPES[relation_type] 1162 for artist_type in artist_types: 1163 artists, instruments = create_artist_data(release_id, options, log_options, tm, relations, 1164 relation_type, artist_type, artists, instruments) 1165 artist_dict = {'artists': artists, 'instruments': instruments} 1166 return artist_dict 1167 1168 1169def create_artist_data(release_id, options, log_options, tm, relations, 1170 relation_type, artist_type, artists, instruments): 1171 """ 1172 Update the artists and instruments 1173 :param release_id: the current album id 1174 :param options: 1175 :param log_options: 1176 :param tm: track metadata 1177 :param relations: 1178 :param relation_type: release', 'recording' or 'work' (NB 'work' does not pass a param for tm) 1179 :param artist_type: from const.RELATION_TYPES[relation_type] 1180 :param artists: current artist list - updated with each call 1181 :param instruments: current instruments list - updated with each call 1182 :return: artists, instruments 1183 """ 1184 type_list = parse_data( 1185 release_id, 1186 relations, 1187 [], 1188 'target-type:artist', 1189 'type:' + 1190 artist_type) 1191 for type_item in type_list: 1192 artist_name_list = parse_data( 1193 release_id, type_item, [], 'artist', 'name') 1194 artist_sort_name_list = parse_data( 1195 release_id, type_item, [], 'artist', 'sort-name') 1196 if artist_type not in [ 1197 'instrument', 1198 'vocal', 1199 'instrument arranger', 1200 'vocal arranger']: 1201 instrument_list = None 1202 credited_inst_list = None 1203 else: 1204 instrument_list_list = parse_data( 1205 release_id, type_item, [], 'attributes') 1206 if instrument_list_list: 1207 instrument_list = instrument_list_list[0] 1208 else: 1209 instrument_list = [] 1210 credited_inst_list = instrument_list[:] 1211 credited_inst_dict_list = parse_data( 1212 release_id, type_item, [], 'attribute-credits') # keyed to insts 1213 if credited_inst_dict_list: 1214 credited_inst_dict = credited_inst_dict_list[0] 1215 else: 1216 credited_inst_dict = {} 1217 for i, inst in enumerate(instrument_list): 1218 if inst in credited_inst_dict: 1219 credited_inst_list[i] = credited_inst_dict[inst] 1220 1221 if artist_type == 'vocal': 1222 if not instrument_list: 1223 instrument_list = ['vocals'] 1224 elif not any('vocals' in x for x in instrument_list): 1225 instrument_list.append('vocals') 1226 credited_inst_list.append('vocals') 1227 # fill the hidden vars before we choose to use the as-credited 1228 # version 1229 if relation_type != 'work': 1230 inst_tag = [] 1231 cred_tag = [] 1232 if instrument_list: 1233 inst_tag = list(set(instrument_list)) 1234 if credited_inst_list: 1235 cred_tag = list(set(credited_inst_list)) 1236 for attrib in ['solo', 'guest', 'additional']: 1237 if attrib in inst_tag: 1238 inst_tag.remove(attrib) 1239 if attrib in cred_tag: 1240 cred_tag.remove(attrib) 1241 if inst_tag: 1242 if tm['~cea_instruments']: 1243 tm['~cea_instruments'] = add_list_uniquely( 1244 tm['~cea_instruments'], inst_tag) 1245 else: 1246 tm['~cea_instruments'] = inst_tag 1247 if cred_tag: 1248 if tm['~cea_instruments_credited']: 1249 tm['~cea_instruments_credited'] = add_list_uniquely( 1250 tm['~cea_instruments_credited'], cred_tag) 1251 else: 1252 tm['~cea_instruments_credited'] = cred_tag 1253 if inst_tag or cred_tag: 1254 if tm['~cea_instruments_all']: 1255 tm['~cea_instruments_all'] = add_list_uniquely( 1256 tm['~cea_instruments_all'], list(set(inst_tag + cred_tag))) 1257 else: 1258 tm['~cea_instruments_all'] = list( 1259 set(inst_tag + cred_tag)) 1260 if '~cea_instruments' in tm and '~cea_instruments_credited' in tm and '~cea_instruments_all' in tm: 1261 instruments = [ 1262 tm['~cea_instruments'], 1263 tm['~cea_instruments_credited'], 1264 tm['~cea_instruments_all']] 1265 if options['cea_inst_credit'] and credited_inst_list: 1266 instrument_list = credited_inst_list 1267 if instrument_list: 1268 instrument_sort = 3 1269 s_key = { 1270 'lead vocals': 1, 1271 'solo': 2, 1272 'guest': 4, 1273 'additional': 5} 1274 for inst in s_key: 1275 if inst in instrument_list: 1276 instrument_sort = s_key[inst] 1277 else: 1278 instrument_sort = 0 1279 1280 if artist_type in const.ARTIST_TYPE_ORDER: 1281 type_sort = const.ARTIST_TYPE_ORDER[artist_type] 1282 else: 1283 type_sort = 99 1284 if log_options['log_error']: 1285 write_log( 1286 release_id, 1287 'error', 1288 "Error in artist type. Type '%s' is not in ARTIST_TYPE_ORDER dictionary", 1289 artist_type) 1290 1291 artist = ( 1292 artist_type, 1293 instrument_list, 1294 artist_name_list, 1295 artist_sort_name_list, 1296 instrument_sort, 1297 type_sort) 1298 artists.append(artist) 1299 # Sorted by sort name then instrument_sort then artist type 1300 artists = sorted(artists, key=lambda x: (x[5], x[3], x[4], x[1])) 1301 if log_options['log_info']: 1302 write_log(release_id, 'info', 'sorted artists = %s', artists) 1303 return artists, instruments 1304 1305 1306def get_series(options, release_id, relations): 1307 """ 1308 Get series info (depends on lookup having used inc=series-rel) 1309 :param options: 1310 :param release_id: 1311 :param relations: 1312 :return: 1313 """ 1314 # if options['log_debug'] or options['log_info']: 1315 # write_log( 1316 # release_id, 1317 # 'debug', 1318 # 'In get_series. relations: %s', 1319 # relations) 1320 # series_name_list =[] 1321 # series_id_list = [] 1322 # for series_rels in relations: 1323 # series_rel = parse_data( 1324 # release_id, 1325 # series_rels, 1326 # [], 1327 # 'target-type:series', 1328 # 'type:part-of') 1329 # if options['log_debug'] or options['log_info']: 1330 # write_log( 1331 # release_id, 1332 # 'debug', 1333 # 'series_rel = %s', 1334 # series_rel) 1335 # series_name_list.extend( 1336 # parse_data(release_id, series_rel, [], 'series', 'name') 1337 # ) 1338 # series_id_list.extend( 1339 # parse_data(release_id, series_rel, [], 'series', 'id') 1340 # ) 1341 type_list = parse_data( 1342 release_id, 1343 relations, 1344 [], 1345 'target-type:series', 1346 'type:part of') 1347 if type_list: 1348 series_name_list = [] 1349 series_id_list = [] 1350 series_number_list = [] 1351 for type_item in type_list: 1352 series_name_list = parse_data( 1353 release_id, type_item, [], 'series', 'name') 1354 series_id_list = parse_data( 1355 release_id, type_item, [], 'series', 'id') 1356 series_number_list = parse_data( 1357 release_id, type_item, [], 'attribute-values', 'number') 1358 return {'name_list': series_name_list, 'id_list': series_id_list, 'number_list': series_number_list} 1359 else: 1360 return None 1361 1362 1363 1364def apply_artist_style( 1365 options, 1366 release_id, 1367 lang, 1368 a_list, 1369 name_style, 1370 name_tag, 1371 sort_tag, 1372 names_tag, 1373 names_sort_tag): 1374 # Get artist and apply style 1375 for a_item in a_list: 1376 for acs in a_item: 1377 artistlist = parse_data(release_id, acs, [], 'name') 1378 sortlist = parse_data(release_id, acs, [], 'artist', 'sort-name') 1379 names = {} 1380 if lang: 1381 names['alias'] = parse_data( 1382 release_id, 1383 acs, 1384 [], 1385 'artist', 1386 'aliases', 1387 'locale:' + lang, 1388 'primary:True', 1389 'name') 1390 else: 1391 names['alias'] = [] 1392 names['credit'] = parse_data(release_id, acs, [], 'name') 1393 pairslist = list(zip(artistlist, sortlist)) 1394 names['sort'] = [ 1395 translate_from_sortname( 1396 *pair) for pair in pairslist] 1397 for style in name_style: 1398 if names[style]: 1399 artistlist = names[style] 1400 break 1401 joinlist = parse_data(release_id, acs, [], 'joinphrase') 1402 1403 if artistlist: 1404 name_tag.append(artistlist[0]) 1405 sort_tag.append(sortlist[0]) 1406 names_tag.append(artistlist[0]) 1407 names_sort_tag.append(sortlist[0]) 1408 1409 if joinlist: 1410 name_tag.append(joinlist[0]) 1411 sort_tag.append(joinlist[0]) 1412 1413 name_tag_str = ''.join(name_tag) 1414 sort_tag_str = ''.join(sort_tag) 1415 1416 return { 1417 'artists': names_tag, 1418 'artists_sort': names_sort_tag, 1419 'artist': name_tag_str, 1420 'artistsort': sort_tag_str} 1421 1422 1423def set_work_artists(self, release_id, album, track, writerList, tm, count): 1424 """ 1425 :param release_id: 1426 :param self is the calling object from Artists or WorkParts 1427 :param album: the current album 1428 :param track: the current track 1429 :param writerList: format [(artist_type, [instrument_list], [name list],[sort_name list]),(.....etc] 1430 :param tm: track metadata 1431 :param count: depth count of recursion in process_work_artists (should equate to part level) 1432 :return: 1433 """ 1434 1435 options = self.options[track] 1436 if not options['classical_work_parts']: 1437 caller = 'ExtraArtists' 1438 pre = '~cea' 1439 else: 1440 caller = 'PartLevels' 1441 pre = '~cwp' 1442 write_log( 1443 release_id, 1444 'debug', 1445 'Class: %s: in set_work_artists for track %s. Count (level) is %s. Writer list is %s', 1446 caller, 1447 track, 1448 count, 1449 writerList) 1450 # tag strings are a tuple (Picard tag, cwp tag, Picard sort tag, cwp sort 1451 # tag) (NB this is modelled on set_performer) 1452 tag_strings = const.tag_strings(pre) 1453 # insertions lists artist types where names in the main Picard tags may be 1454 # updated for annotations 1455 insertions = const.INSERTIONS 1456 no_more_lyricists = False 1457 if caller == 'PartLevels' and self.lyricist_filled[track]: 1458 no_more_lyricists = True 1459 1460 for writer in writerList: 1461 writer_type = writer[0] 1462 if writer_type not in tag_strings: 1463 break 1464 if no_more_lyricists and ( 1465 writer_type == 'lyricist' or writer_type == 'librettist'): 1466 break 1467 if writer[1]: 1468 inst_list = writer[1][:] 1469 # take a copy of the list in case (because of list 1470 # mutability) we need the old one 1471 instrument = ", ".join(inst_list) 1472 else: 1473 instrument = None 1474 sub_strings = { # 'instrument arranger': instrument, 'vocal arranger': instrument 1475 } 1476 if options['cea_arranger']: 1477 if instrument: 1478 arr_inst = options['cea_arranger'] + ' ' + instrument 1479 else: 1480 arr_inst = options['cea_arranger'] 1481 else: 1482 arr_inst = instrument 1483 annotations = {'writer': options['cea_writer'], 1484 'lyricist': options['cea_lyricist'], 1485 'librettist': options['cea_librettist'], 1486 'revised by': options['cea_revised'], 1487 'translator': options['cea_translator'], 1488 'arranger': options['cea_arranger'], 1489 'reconstructed by': options['cea_reconstructed'], 1490 'orchestrator': options['cea_orchestrator'], 1491 'instrument arranger': arr_inst, 1492 'vocal arranger': arr_inst} 1493 tag = tag_strings[writer_type][0] 1494 sort_tag = tag_strings[writer_type][2] 1495 cwp_tag = tag_strings[writer_type][1] 1496 cwp_sort_tag = tag_strings[writer_type][3] 1497 cwp_names_tag = cwp_tag[:-1] + '_names' 1498 cwp_instrumented_tag = cwp_names_tag + '_instrumented' 1499 if writer_type in sub_strings: 1500 if sub_strings[writer_type]: 1501 tag += sub_strings[writer_type] 1502 if tag: 1503 if '~ce_tag_cleared_' + \ 1504 tag not in tm or not tm['~ce_tag_cleared_' + tag] == "Y": 1505 if tag in tm: 1506 if options['log_info']: 1507 write_log(release_id, 'info', 'delete tag %s', tag) 1508 del tm[tag] 1509 tm['~ce_tag_cleared_' + tag] = "Y" 1510 if sort_tag: 1511 if '~ce_tag_cleared_' + \ 1512 sort_tag not in tm or not tm['~ce_tag_cleared_' + sort_tag] == "Y": 1513 if sort_tag in tm: 1514 del tm[sort_tag] 1515 tm['~ce_tag_cleared_' + sort_tag] = "Y" 1516 1517 name_list = writer[2] 1518 for ind, name in enumerate(name_list): 1519 sort_name = writer[3][ind] 1520 no_credit = True 1521 write_log( 1522 release_id, 1523 'info', 1524 'In set_work_artists. Name before changes = %s', 1525 name) 1526 # change name to as-credited 1527 if options['cea_composer_credited']: 1528 if album in self.artist_credits and sort_name in self.artist_credits[album]: 1529 no_credit = False 1530 name = self.artist_credits[album][sort_name] 1531 # over-ride with aliases if appropriate 1532 if (options['cea_aliases'] or options['cea_aliases_composer']) and ( 1533 no_credit or options['cea_alias_overrides']): 1534 if sort_name in self.artist_aliases: 1535 name = self.artist_aliases[sort_name] 1536 # fix cyrillic names if not already fixed 1537 if options['cea_cyrillic']: 1538 if not only_roman_chars(name): 1539 name = remove_middle(unsort(sort_name)) 1540 # Only remove middle name where the existing 1541 # performer is in non-latin script 1542 annotated_name = name 1543 write_log( 1544 release_id, 1545 'info', 1546 'In set_work_artists. Name after changes = %s', 1547 name) 1548 # add annotations and write performer tags 1549 if writer_type in annotations: 1550 if annotations[writer_type]: 1551 annotated_name += ' (' + annotations[writer_type] + ')' 1552 if instrument: 1553 instrumented_name = name + ' (' + instrument + ')' 1554 else: 1555 instrumented_name = name 1556 1557 if writer_type in insertions and options['cea_arrangers']: 1558 self.append_tag(release_id, tm, tag, annotated_name) 1559 else: 1560 if options['cea_arrangers'] or writer_type == tag: 1561 self.append_tag(release_id, tm, tag, name) 1562 1563 if options['cea_arrangers'] or writer_type == tag: 1564 if sort_tag: 1565 self.append_tag(release_id, tm, sort_tag, sort_name) 1566 if options['cea_tag_sort'] and '~' in sort_tag: 1567 explicit_sort_tag = sort_tag.replace('~', '') 1568 self.append_tag( 1569 release_id, tm, explicit_sort_tag, sort_name) 1570 self.append_tag(release_id, tm, cwp_tag, annotated_name) 1571 self.append_tag(release_id, tm, cwp_names_tag, name) 1572 if instrumented_name != name: 1573 self.append_tag( 1574 release_id, 1575 tm, 1576 cwp_instrumented_tag, 1577 instrumented_name) 1578 1579 if cwp_sort_tag: 1580 self.append_tag(release_id, tm, cwp_sort_tag, sort_name) 1581 1582 if caller == 'PartLevels' and ( 1583 writer_type == 'lyricist' or writer_type == 'librettist'): 1584 self.lyricist_filled[track] = True 1585 write_log( 1586 release_id, 1587 'info', 1588 'Filled lyricist for track %s. Not looking further', 1589 track) 1590 1591 if writer_type == 'composer': 1592 composerlast = sort_name.split(",")[0] 1593 write_log( 1594 release_id, 1595 'info', 1596 'composerlast = %s', 1597 composerlast) 1598 self.append_tag( 1599 release_id, 1600 tm, 1601 pre + 1602 '_composer_lastnames', 1603 composerlast) 1604 if sort_name in self.release_artists_sort[album]: 1605 self.append_tag( 1606 release_id, tm, '~cea_album_composers', name) 1607 self.append_tag( 1608 release_id, tm, '~cea_album_composers_sort', sort_name) 1609 self.append_tag( 1610 release_id, 1611 tm, 1612 '~cea_album_track_composer_lastnames', 1613 composerlast) 1614 composer_last_names(self, release_id, tm, album) 1615 1616 1617# Non-Latin character processing 1618latin_letters = {} 1619 1620def is_latin(uchr): 1621 """Test whether character is in Latin script""" 1622 try: 1623 return latin_letters[uchr] 1624 except KeyError: 1625 return latin_letters.setdefault( 1626 uchr, 'LATIN' in unicodedata.name(uchr)) 1627 1628 1629def only_roman_chars(unistr): 1630 """Test whether string is in Latin script""" 1631 return all(is_latin(uchr) 1632 for uchr in unistr 1633 if uchr.isalpha()) 1634 1635 1636def get_roman(string): 1637 """Transliterate cyrillic script to Latin script""" 1638 translit_string = "" 1639 for index, char in enumerate(string): 1640 if char in const.CYRILLIC_LOWER.keys(): 1641 char = const.CYRILLIC_LOWER[char] 1642 elif char in const.CYRILLIC_UPPER.keys(): 1643 char = const.CYRILLIC_UPPER[char] 1644 if string[index + 1] not in const.CYRILLIC_LOWER.keys(): 1645 char = char.upper() 1646 translit_string += char 1647 # fix multi-chars 1648 translit_string = translit_string.replace('ks', 'x').replace('iy ', 'i ') 1649 return translit_string 1650 1651 1652def remove_middle(performer): 1653 """To remove middle names of Russian composers""" 1654 plist = performer.split() 1655 if len(plist) == 3: 1656 return plist[0] + ' ' + plist[2] 1657 else: 1658 return performer 1659 1660 1661# Sorting etc. 1662 1663def unsort(performer): 1664 """ 1665 To take a sort field and recreate the name 1666 Only now used for last-ditch cyrillic translation - superseded by 'translate_from_sortname' 1667 """ 1668 sorted_list = performer.split(', ') 1669 sorted_list.reverse() 1670 for i, item in enumerate(sorted_list): 1671 if item[-1] != "'": 1672 sorted_list[i] += ' ' 1673 return ''.join(sorted_list).strip() 1674 1675 1676def _reverse_sortname(sortname): 1677 """ 1678 Reverse sortnames. 1679 Code is from picard/util/__init__.py 1680 """ 1681 1682 chunks = [a.strip() for a in sortname.split(",")] 1683 chunk_len = len(chunks) 1684 if chunk_len == 2: 1685 return "%s %s" % (chunks[1], chunks[0]) 1686 elif chunk_len == 3: 1687 return "%s %s %s" % (chunks[2], chunks[1], chunks[0]) 1688 elif chunk_len == 4: 1689 return "%s %s, %s %s" % (chunks[1], chunks[0], chunks[3], chunks[2]) 1690 else: 1691 return sortname.strip() 1692 1693 1694def stripsir(performer): 1695 """ 1696 Remove honorifics from names 1697 Also standardize hyphens and apostrophes in names 1698 """ 1699 performer = performer.replace(u'\u2010', u'-').replace(u'\u2019', u"'") 1700 sir = re.compile(r'(.*)\b(Sir|Maestro|Dame)\b\s*(.*)', re.IGNORECASE) 1701 match = sir.search(performer) 1702 if match: 1703 return match.group(1) + match.group(3) 1704 else: 1705 return performer 1706 1707 1708# def swap_prefix(performer): 1709# """NOT CURRENTLY USED. Create sort fields for ensembles etc., by placing the prefix (see constants) at the end""" 1710# prefix = '|'.join(prefixes) 1711# swap = re.compile(r'^(' + prefix + r')\b\s*(.*)', re.IGNORECASE) 1712# match = swap.search(performer) 1713# if match: 1714# return match.group(2) + ", " + match.group(1) 1715# else: 1716# return performer 1717 1718 1719def replace_roman_numerals(s): 1720 """Replaces roman numerals include in s, where followed by certain punctuation, by digits""" 1721 romans = RE_ROMANS.findall(s) 1722 for roman in romans: 1723 if roman[0]: 1724 numerals = str(roman[0]) 1725 digits = str(from_roman(numerals)) 1726 to_replace = r'\b' + roman[0] + r'\b' 1727 s = re.sub(to_replace, digits, s) 1728 return s 1729 1730 1731def from_roman(s): 1732 romanNumeralMap = (('M', 1000), 1733 ('CM', 900), 1734 ('D', 500), 1735 ('CD', 400), 1736 ('C', 100), 1737 ('XC', 90), 1738 ('L', 50), 1739 ('XL', 40), 1740 ('X', 10), 1741 ('IX', 9), 1742 ('V', 5), 1743 ('IV', 4), 1744 ('I', 1), 1745 ('m', 1000), 1746 ('cm', 900), 1747 ('d', 500), 1748 ('cd', 400), 1749 ('c', 100), 1750 ('xc', 90), 1751 ('l', 50), 1752 ('xl', 40), 1753 ('x', 10), 1754 ('ix', 9), 1755 ('v', 5), 1756 ('iv', 4), 1757 ('i', 1)) 1758 result = 0 1759 index = 0 1760 for numeral, integer in romanNumeralMap: 1761 while s[index:index + len(numeral)] == numeral: 1762 result += integer 1763 index += len(numeral) 1764 return result 1765 1766 1767def turbo_lcs(release_id, multi_list): 1768 """ 1769 Picks the best longest common string method to use 1770 Works with a list of lists or a list of strings 1771 :param release_id: 1772 :param multi_list: a list of strings or a list of lists 1773 :return: longest common substring/list 1774 """ 1775 write_log(release_id, 'debug', 'In turbo_lcs') 1776 if not isinstance(multi_list, list): 1777 return None 1778 list_sum = sum([len(x) for x in multi_list]) 1779 list_len = len(multi_list) 1780 if list_len < 2: 1781 if list_len == 1: 1782 return multi_list[0] # Nothing to do! 1783 else: 1784 return [] 1785 # for big matches, use the generalised suffix tree method 1786 if ((list_sum / list_len) ** 2) * list_len > 1000: 1787 # heuristic: may need to tweak the 1000 in the light of results 1788 lcs_dict = suffixtree.multi_lcs(multi_list) 1789 # NB suffixtree may be shown as an unresolved reference in the IDE, 1790 # but it should work provided it is included in the package 1791 if "error" not in lcs_dict: 1792 if "response" in lcs_dict: 1793 write_log( 1794 release_id, 1795 'info', 1796 'Longest common string was returned from suffix tree algo') 1797 return lcs_dict['response'] 1798 1799 ## If suffix tree fails, write errors to log before proceeding with alternative 1800 else: 1801 write_log( 1802 release_id, 1803 'error', 1804 'Suffix tree failure for release %s. Error unknown. Using standard lcs algo instead', 1805 release_id) 1806 else: 1807 write_log( 1808 release_id, 1809 'error', 1810 'Suffix tree failure for release %s. Error message: %s. Using standard lcs algo instead', 1811 release_id, 1812 lcs_dict['error']) 1813 # otherwise, or if gst fails, use the standard algorithm 1814 first = True 1815 common = [] 1816 for item in multi_list: 1817 if first: 1818 common = item 1819 first = False 1820 else: 1821 lcs = longest_common_substring( 1822 item, common) 1823 common = lcs['string'] 1824 write_log(release_id, 'debug', 'LCS returned from standard algo') 1825 return common 1826 1827 1828def longest_common_substring(s1, s2): 1829 """ 1830 Standard lcs algo for short strings, or if suffix tree does not work 1831 :param s1: substring 1 1832 :param s2: substring 2 1833 :return: {'string': the longest common substring, 1834 'start': the start position in s1, 1835 'length': the length of the common substring} 1836 NB this also works on list arguments - i.e. it will find the longest common sub-list 1837 """ 1838 m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))] 1839 longest, x_longest = 0, 0 1840 for x in range(1, 1 + len(s1)): 1841 for y in range(1, 1 + len(s2)): 1842 if s1[x - 1] == s2[y - 1]: 1843 m[x][y] = m[x - 1][y - 1] + 1 1844 if m[x][y] > longest: 1845 longest = m[x][y] 1846 x_longest = x 1847 else: 1848 m[x][y] = 0 1849 return {'string': s1[x_longest - longest: x_longest], 1850 'start': x_longest - longest, 'length': longest} 1851 1852 1853def longest_common_sequence(list1, list2, minstart=0, maxstart=0): 1854 """ 1855 :param list1: list 1 1856 :param list2: list 2 1857 :param minstart: the earliest point to start looking for a match 1858 :param maxstart: the latest point to start looking for a match 1859 :return: {'sequence': the common subsequence, 'length': length of subsequence} 1860 maxstart must be >= minstart. If they are equal then the start point is fixed. 1861 Note that this only finds subsequences starting at the same position 1862 Use longest_common_substring for the more general problem 1863 """ 1864 if maxstart < minstart: 1865 return None, 0 1866 min_len = min(len(list1), len(list2)) 1867 longest = 0 1868 seq = None 1869 maxstart = min(maxstart, min_len) + 1 1870 for k in range(minstart, maxstart): 1871 for i in range(k, min_len + 1): 1872 if list1[k:i] == list2[k:i] and i - k > longest: 1873 longest = i - k 1874 seq = list1[k:i] 1875 return {'sequence': seq, 'length': longest} 1876 1877 1878def substart_finder(mylist, pattern): 1879 for i, list_item in enumerate(mylist): 1880 if list_item == pattern[0] and mylist[i:i + len(pattern)] == pattern: 1881 return i 1882 return len(mylist) # if nothing found 1883 1884 1885def get_ui_tags(): 1886## Determine tags for display in ui 1887 options = config.setting 1888 ui_tags_raw = options['ce_ui_tags'] 1889 ui_tags = {} 1890 ui_tags_split = [x.replace('(','').strip(') ') for x in ui_tags_raw.split('/')] 1891 for ui_column in ui_tags_split: 1892 if ':' in ui_column: 1893 ui_col_parts = [x.strip() for x in ui_column.split(':')] 1894 heading = ui_col_parts[0] 1895 tag_names = ui_col_parts[1].split(',') 1896 tag_names = [x.strip() for x in tag_names] 1897 ui_tags[heading] = tuple(tag_names) 1898 return ui_tags 1899 1900 1901def map_tags(options, release_id, album, tm): 1902 """ 1903 Do the common tag processing - including for the genres and tag-mapping sections 1904 :param release_id: name for log file - usually =musicbrainz_albumid 1905 unless called outside metadata processor 1906 :param options: options passed from either Artists or Workparts 1907 :param album: 1908 :param tm: track metadata 1909 :return: None - action is through setting tm contents 1910 This is a common function for Artists and Workparts which should only run after both sections have completed for 1911 a given track. If, say, Artists calls it and Workparts is not done, 1912 then it will not execute until Workparts calls it (and vice versa). 1913 """ 1914 1915 write_log(release_id, 'debug', 'In map_tags, checking readiness...') 1916 if (options['classical_extra_artists'] and '~cea_artists_complete' not in tm) or ( 1917 options['classical_work_parts'] and '~cea_works_complete' not in tm): 1918 write_log(release_id, 'info', '...not ready') 1919 return 1920 write_log(release_id, 'debug', '... processing tag mapping') 1921 1922 # blank tags 1923 blank_tags = options['cea_blank_tag'].split( 1924 ",") + options['cea_blank_tag_2'].split(",") 1925 if 'artists_sort' in [x.strip() for x in blank_tags]: 1926 blank_tags.append('~artists_sort') 1927 for tag in blank_tags: 1928 if tag.strip() in tm: 1929 # place blanked tags into hidden variables available for 1930 # re-use 1931 tm['~cea_' + tag.strip()] = tm[tag.strip()] 1932 del tm[tag.strip()] 1933 1934 # album 1935 if tm['~cea_album_composer_lastnames']: 1936 last_names = str_to_list(tm['~cea_album_composer_lastnames']) 1937 if options['cea_composer_album']: 1938 # save it as a list to prevent splitting when appending tag 1939 tm['~cea_release'] = [tm['album']] 1940 new_last_names = [] 1941 for last_name in last_names: 1942 last_name = last_name.strip() 1943 new_last_names.append(last_name) 1944 if len(new_last_names) > 0: 1945 tm['album'] = "; ".join(new_last_names) + ": " + tm['album'] 1946 1947 # remove lyricists if no vocals, according to option set 1948 if options['cea_no_lyricists'] and not any( 1949 [x for x in str_to_list(tm['~cea_performers']) if 'vocals' in x]): 1950 if 'lyricist' in tm: 1951 del tm['lyricist'] 1952 for lyricist_tag in ['lyricists', 'librettists', 'translators']: 1953 if '~cwp_' + lyricist_tag in tm: 1954 del tm['~cwp_' + lyricist_tag] 1955 1956 # genres 1957 if config.setting['folksonomy_tags'] and 'genre' in tm: 1958 candidate_genres = str_to_list(tm['genre']) 1959 append_tag(release_id, tm, '~cea_candidate_genres', candidate_genres) 1960 # to avoid confusion as it will contain unmatched folksonomy tags 1961 del tm['genre'] 1962 else: 1963 candidate_genres = [] 1964 is_classical = False 1965 composers_not_found = [] 1966 composer_found = False 1967 composer_born_list = [] 1968 composer_died_list = [] 1969 arrangers_not_found = [] 1970 arranger_found = False 1971 arranger_born_list = [] 1972 arranger_died_list = [] 1973 no_composer_in_metadata = False 1974 if options['cwp_use_muso_refdb'] and options['cwp_muso_classical'] or options['cwp_muso_dates']: 1975 if COMPOSER_DICT: 1976 composersort_list = [] 1977 if '~cwp_composer_names' in tm: 1978 composer_list = str_to_list(tm['~cwp_composer_names']) 1979 else: 1980 # maybe there were no works linked, 1981 # but it might still a classical track (based on composer name) 1982 no_composer_in_metadata = True 1983 composer_list = str_to_list(tm['artists']) 1984 composersort_list = str_to_list(tm['~artists_sort']) 1985 write_log(release_id, 'info', "No composer metadata for track %s. Using artists %r", tm['title'], 1986 composer_list) 1987 lc_composer_list = [c.lower() for c in composer_list] 1988 for ind, composer in enumerate(lc_composer_list): 1989 for classical_composer in COMPOSER_DICT: 1990 if composer in classical_composer['lc_name']: 1991 if options['cwp_muso_classical']: 1992 candidate_genres.append('Classical') 1993 is_classical = True 1994 if options['cwp_muso_dates']: 1995 composer_born_list = classical_composer['birth'] 1996 composer_died_list = classical_composer['death'] 1997 composer_found = True 1998 if no_composer_in_metadata: 1999 composersort = composersort_list[ind] 2000 append_tag(release_id, tm, 'composer', composer_list[ind]) 2001 append_tag(release_id, tm, '~cwp_composer_names', composer_list[ind]) 2002 append_tag(release_id, tm, 'composersort', composersort) 2003 append_tag(release_id, tm, '~cwp_composers_sort', composersort) 2004 append_tag(release_id, tm, '~cwp_composer_lastnames', composersort.split(', ')[0]) 2005 break 2006 if not composer_found: 2007 composer_index = lc_composer_list.index(composer) 2008 orig_composer = composer_list[composer_index] 2009 composers_not_found.append(orig_composer) 2010 append_tag( 2011 release_id, 2012 tm, 2013 '~cwp_unrostered_composers', 2014 orig_composer) 2015 if composers_not_found: 2016 append_tag( 2017 release_id, 2018 tm, 2019 '003_information:composers', 2020 'Composer(s) ' + 2021 list_to_str(composers_not_found) + 2022 ' not found in reference database of classical composers') 2023 2024 # do the same for arrangers, if required 2025 if options['cwp_genres_arranger_as_composer'] or options['cwp_periods_arranger_as_composer']: 2026 arranger_list = str_to_list( 2027 tm['~cea_arranger_names']) + str_to_list(tm['~cwp_arranger_names']) 2028 lc_arranger_list = [c.lower() for c in arranger_list] 2029 for arranger in lc_arranger_list: 2030 for classical_arranger in COMPOSER_DICT: 2031 if arranger in classical_arranger['lc_name']: 2032 if options['cwp_muso_classical'] and options['cwp_genres_arranger_as_composer']: 2033 candidate_genres.append('Classical') 2034 is_classical = True 2035 if options['cwp_muso_dates'] and options['cwp_periods_arranger_as_composer']: 2036 arranger_born_list = classical_arranger['birth'] 2037 arranger_died_list = classical_arranger['death'] 2038 arranger_found = True 2039 break 2040 if not arranger_found: 2041 arranger_index = lc_arranger_list.index(arranger) 2042 orig_arranger = arranger_list[arranger_index] 2043 arrangers_not_found.append(orig_arranger) 2044 append_tag( 2045 release_id, 2046 tm, 2047 '~cwp_unrostered_arrangers', 2048 orig_arranger) 2049 if arrangers_not_found: 2050 append_tag( 2051 release_id, 2052 tm, 2053 '003_information:arrangers', 2054 'Arranger(s) ' + 2055 list_to_str(arrangers_not_found) + 2056 ' not found in reference database of classical composers') 2057 2058 else: 2059 append_tag( 2060 release_id, 2061 tm, 2062 '001_errors:8', 2063 '8. No composer reference file. Check log for error messages re path name.') 2064 2065 if options['cwp_use_muso_refdb'] and options['cwp_muso_genres'] and GENRE_DICT: 2066 main_classical_genres_list = [list_to_str( 2067 mg['name']).strip() for mg in GENRE_DICT] 2068 else: 2069 main_classical_genres_list = [ 2070 sg.strip() for sg in options['cwp_genres_classical_main'].split(',')] 2071 sub_classical_genres_list = [ 2072 sg.strip() for sg in options['cwp_genres_classical_sub'].split(',')] 2073 main_other_genres_list = [ 2074 sg.strip() for sg in options['cwp_genres_other_main'].split(',')] 2075 sub_other_genres_list = [sg.strip() 2076 for sg in options['cwp_genres_other_sub'].split(',')] 2077 main_classical_genres = [] 2078 sub_classical_genres = [] 2079 main_other_genres = [] 2080 sub_other_genres = [] 2081 if '~cea_work_type' in tm: 2082 candidate_genres += str_to_list(tm['~cea_work_type']) 2083 if '~cwp_candidate_genres' in tm: 2084 candidate_genres += str_to_list(tm['~cwp_candidate_genres']) 2085 write_log(release_id, 'info', "Candidate genres: %r", candidate_genres) 2086 untagged_genres = [] 2087 if candidate_genres: 2088 main_classical_genres = [ 2089 val for val in main_classical_genres_list if val.lower() in [ 2090 genre.lower() for genre in candidate_genres]] 2091 sub_classical_genres = [ 2092 val for val in sub_classical_genres_list if val.lower() in [ 2093 genre.lower() for genre in candidate_genres]] 2094 2095 if main_classical_genres or sub_classical_genres or options['cwp_genres_classical_all']: 2096 is_classical = True 2097 main_classical_genres.append('Classical') 2098 candidate_genres.append('Classical') 2099 write_log(release_id, 'info', "Main classical genres for track %s: %r", tm['title'], main_classical_genres) 2100 candidate_genres += str_to_list(tm['~cea_work_type_if_classical']) 2101 # next two are repeated statements, but a separate fn would be 2102 # clumsy too! 2103 main_classical_genres = [ 2104 val for val in main_classical_genres_list if val.lower() in [ 2105 genre.lower() for genre in candidate_genres]] 2106 sub_classical_genres = [ 2107 val for val in sub_classical_genres_list if val.lower() in [ 2108 genre.lower() for genre in candidate_genres]] 2109 if options['cwp_genres_classical_exclude']: 2110 main_classical_genres = [ 2111 g for g in main_classical_genres if g.lower() != 'classical'] 2112 2113 main_other_genres = [ 2114 val for val in main_other_genres_list if val.lower() in [ 2115 genre.lower() for genre in candidate_genres]] 2116 sub_other_genres = [ 2117 val for val in sub_other_genres_list if val.lower() in [ 2118 genre.lower() for genre in candidate_genres]] 2119 all_genres = main_classical_genres + sub_classical_genres + \ 2120 main_other_genres + sub_other_genres 2121 untagged_genres = [ 2122 un for un in candidate_genres if un.lower() not in [ 2123 genre.lower() for genre in all_genres]] 2124 2125 if options['cwp_genre_tag']: 2126 if not options['cwp_genres_filter']: 2127 append_tag( 2128 release_id, 2129 tm, 2130 options['cwp_genre_tag'], 2131 candidate_genres) 2132 else: 2133 append_tag( 2134 release_id, 2135 tm, 2136 options['cwp_genre_tag'], 2137 main_classical_genres + 2138 main_other_genres) 2139 if options['cwp_subgenre_tag'] and options['cwp_genres_filter']: 2140 append_tag( 2141 release_id, 2142 tm, 2143 options['cwp_subgenre_tag'], 2144 sub_classical_genres + 2145 sub_other_genres) 2146 if is_classical and options['cwp_genres_flag_text'] and options['cwp_genres_flag_tag']: 2147 tm[options['cwp_genres_flag_tag']] = options['cwp_genres_flag_text'] 2148 if not ( 2149 main_classical_genres + 2150 main_other_genres)and options['cwp_genres_filter']: 2151 if options['cwp_genres_default']: 2152 append_tag( 2153 release_id, 2154 tm, 2155 options['cwp_genre_tag'], 2156 options['cwp_genres_default']) 2157 else: 2158 if options['cwp_genre_tag'] in tm: 2159 del tm[options['cwp_genre_tag']] 2160 if untagged_genres and options['cwp_genres_filter']: 2161 append_tag( 2162 release_id, 2163 tm, 2164 '003_information:genres', 2165 'Candidate genres found but not matched: ' + 2166 list_to_str(untagged_genres)) 2167 append_tag(release_id, tm, '~cwp_untagged_genres', untagged_genres) 2168 2169 # instruments and keys 2170 if options['cwp_instruments_MB_names'] and options['cwp_instruments_credited_names'] and tm['~cea_instruments_all']: 2171 instruments = str_to_list(tm['~cea_instruments_all']) 2172 elif options['cwp_instruments_MB_names'] and tm['~cea_instruments']: 2173 instruments = str_to_list(tm['~cea_instruments']) 2174 elif options['cwp_instruments_credited_names'] and tm['~cea_instruments_credited']: 2175 instruments = str_to_list(tm['~cea_instruments_credited']) 2176 else: 2177 instruments = None 2178 if instruments and options['cwp_instruments_tag']: 2179 append_tag(release_id, tm, options['cwp_instruments_tag'], instruments) 2180 # need to append rather than over-write as it may be the same as 2181 # another tag (e.g. genre) 2182 if tm['~cwp_keys'] and options['cwp_key_tag']: 2183 append_tag(release_id, tm, options['cwp_key_tag'], tm['~cwp_keys']) 2184 # dates 2185 if options['cwp_workdate_annotate']: 2186 comp = ' (composed)' 2187 publ = ' (published)' 2188 prem = ' (premiered)' 2189 else: 2190 comp = '' 2191 publ = '' 2192 prem = '' 2193 tm[options['cwp_workdate_tag']] = '' 2194 earliest_date = 9999 2195 latest_date = -9999 2196 found = False 2197 if tm['~cwp_composed_dates']: 2198 composed_dates_list = str_to_list(tm['~cwp_composed_dates']) 2199 if len(composed_dates_list) > 1: 2200 composed_dates_list = str_to_list( 2201 composed_dates_list[0]) # use dates of lowest-level work 2202 earliest_date = min([int(dates.split(DATE_SEP)[0].strip()) 2203 for dates in composed_dates_list]) 2204 append_tag( 2205 release_id, 2206 tm, 2207 options['cwp_workdate_tag'], 2208 list_to_str(composed_dates_list) + 2209 comp) 2210 found = True 2211 if tm['~cwp_published_dates'] and ( 2212 not found or options['cwp_workdate_use_all']): 2213 if not found: 2214 published_dates_list = str_to_list(tm['~cwp_published_dates']) 2215 if len(published_dates_list) > 1: 2216 published_dates_list = str_to_list( 2217 published_dates_list[0]) # use dates of lowest-level work 2218 earliest_date = min([int(dates.split(DATE_SEP)[0].strip()) 2219 for dates in published_dates_list]) 2220 append_tag( 2221 release_id, 2222 tm, 2223 options['cwp_workdate_tag'], 2224 list_to_str(published_dates_list) + 2225 publ) 2226 found = True 2227 if tm['~cwp_premiered_dates'] and ( 2228 not found or options['cwp_workdate_use_all']): 2229 if not found: 2230 premiered_dates_list = str_to_list(tm['~cwp_premiered_dates']) 2231 if len(premiered_dates_list) > 1: 2232 premiered_dates_list = str_to_list( 2233 premiered_dates_list[0]) # use dates of lowest-level work 2234 earliest_date = min([int(dates.split(DATE_SEP)[0].strip()) 2235 for dates in premiered_dates_list]) 2236 append_tag( 2237 release_id, 2238 tm, 2239 options['cwp_workdate_tag'], 2240 list_to_str(premiered_dates_list) + 2241 prem) 2242 2243 # periods 2244 PERIODS = {} 2245 if options['cwp_period_map']: 2246 if options['cwp_use_muso_refdb'] and options['cwp_muso_periods'] and PERIOD_DICT: 2247 for p_item in PERIOD_DICT: 2248 if 'start' not in p_item or p_item['start'] == []: 2249 p_item['start'] = [u'-9999'] 2250 if 'end' not in p_item or p_item['end'] == []: 2251 p_item['end'] = [u'2525'] 2252 if 'name' not in p_item or p_item['name'] == []: 2253 p_item['name'] = ['NOT SPECIFIED'] 2254 PERIODS = {list_to_str(mp['name']).strip(): ( 2255 list_to_str(mp['start']), 2256 list_to_str(mp['end'])) 2257 for mp in PERIOD_DICT} 2258 for period in PERIODS: 2259 if PERIODS[period][0].lstrip( 2260 '-').isdigit() and PERIODS[period][1].lstrip('-').isdigit(): 2261 PERIODS[period] = (int(PERIODS[period][0]), 2262 int(PERIODS[period][1])) 2263 else: 2264 PERIODS[period] = ( 2265 9999, 2266 'ERROR - start and/or end of ' + 2267 period + 2268 ' are not integers') 2269 2270 else: 2271 periods = [p.strip() for p in options['cwp_period_map'].split(';')] 2272 for p in periods: 2273 p = p.split(',') 2274 if len(p) == 3: 2275 period = p[0].strip() 2276 start = p[1].strip() 2277 end = p[2].strip() 2278 if start.lstrip( 2279 '-').isdigit() and end.lstrip('-').isdigit(): 2280 PERIODS[period] = (int(start), int(end)) 2281 else: 2282 PERIODS[period] = ( 2283 9999, 2284 'ERROR - start and/or end of ' + 2285 period + 2286 ' are not integers') 2287 else: 2288 PERIODS[p[0]] = ( 2289 9999, 'ERROR in period map - each item must contain 3 elements') 2290 if options['cwp_period_tag'] and PERIODS: 2291 if earliest_date == 9999: # i.e. no work date found 2292 if options['cwp_use_muso_refdb'] and options['cwp_muso_dates']: 2293 for composer_born in composer_born_list + arranger_born_list: 2294 if composer_born and composer_born.isdigit(): 2295 birthdate = int(composer_born) 2296 # productive age is taken as 20->death as per Muso 2297 earliest_date = min(earliest_date, birthdate + 20) 2298 for composer_died in composer_died_list + arranger_died_list: 2299 if composer_died and composer_died.isdigit(): 2300 deathdate = int(composer_died) 2301 latest_date = max(latest_date, deathdate) 2302 else: 2303 latest_date = datetime.now().year 2304 # sort into start date order before writing tags 2305 sorted_periods = collections.OrderedDict( 2306 sorted(PERIODS.items(), key=lambda t: t[1])) 2307 for period in sorted_periods: 2308 if isinstance( 2309 sorted_periods[period][1], 2310 str) and 'ERROR' in sorted_periods[period][1]: 2311 tm[options['cwp_period_tag']] = '' 2312 append_tag( 2313 release_id, 2314 tm, 2315 '001_errors:9', 2316 '9. ' + 2317 sorted_periods[period]) 2318 break 2319 if earliest_date < 9999: 2320 if sorted_periods[period][0] <= earliest_date <= sorted_periods[period][1]: 2321 append_tag( 2322 release_id, 2323 tm, 2324 options['cwp_period_tag'], 2325 period) 2326 if latest_date > -9999: 2327 if sorted_periods[period][0] <= latest_date <= sorted_periods[period][1]: 2328 append_tag( 2329 release_id, 2330 tm, 2331 options['cwp_period_tag'], 2332 period) 2333 2334 # generic tag mapping 2335 sort_tags = options['cea_tag_sort'] 2336 if sort_tags: 2337 tm['artists_sort'] = str_to_list(tm['~artists_sort']) 2338 for i in range(0, 16): 2339 tagline = options['cea_tag_' + str(i + 1)].split(",") 2340 source_group = options['cea_source_' + str(i + 1)].split(",") 2341 conditional = options['cea_cond_' + str(i + 1)] 2342 for item, tagx in enumerate(tagline): 2343 tag = tagx.strip() 2344 sort = sort_suffix(tag) 2345 if not conditional or tm[tag] == "": 2346 for source_memberx in source_group: 2347 source_member = source_memberx.strip() 2348 sourceline = source_member.split("+") 2349 if len(sourceline) > 1: 2350 source = "\\" 2351 for source_itemx in sourceline: 2352 source_item = source_itemx.strip() 2353 source_itema = source_itemx.lstrip() 2354 write_log( 2355 release_id, 'info', "Source_item: %s", source_item) 2356 if "~cea_" + source_item in tm: 2357 si = tm['~cea_' + source_item] 2358 elif "~cwp_" + source_item in tm: 2359 si = tm['~cwp_' + source_item] 2360 elif source_item in tm: 2361 si = tm[source_item] 2362 elif len(source_itema) > 0 and source_itema[0] == "\\": 2363 si = source_itema[1:] 2364 else: 2365 si = "" 2366 if si != "" and source != "": 2367 source = source + si 2368 else: 2369 source = "" 2370 else: 2371 source = sourceline[0] 2372 no_names_source = re.sub('(_names)$', 's', source) 2373 source_sort = sort_suffix(source) 2374 write_log( 2375 release_id, 2376 'info', 2377 "Tag mapping: Line: %s, Source: %s, Tag: %s, no_names_source: %s, sort: %s, item %s", 2378 i + 2379 1, 2380 source, 2381 tag, 2382 no_names_source, 2383 sort, 2384 item) 2385 if '~cea_' + source in tm or '~cwp_' + source in tm: 2386 for prefix in ['~cea_', '~cwp_']: 2387 if prefix + source in tm: 2388 write_log(release_id, 'info', prefix) 2389 append_tag(release_id, tm, tag, 2390 tm[prefix + source], ['; ']) 2391 if sort_tags: 2392 if prefix + no_names_source + source_sort in tm: 2393 write_log( 2394 release_id, 'info', prefix + " sort") 2395 append_tag(release_id, tm, tag + sort, 2396 tm[prefix + no_names_source + source_sort], ['; ']) 2397 elif source in tm or '~' + source in tm: 2398 write_log(release_id, 'info', "Picard") 2399 for p in ['', '~']: 2400 if p + source in tm: 2401 append_tag(release_id, tm, tag, 2402 tm[p + source], ['; ', '/ ']) 2403 if sort_tags: 2404 if "~" + source + source_sort in tm: 2405 source = "~" + source 2406 if source + source_sort in tm: 2407 write_log( 2408 release_id, 'info', "Picard sort") 2409 append_tag(release_id, tm, tag + sort, 2410 tm[source + source_sort], ['; ', '/ ']) 2411 elif len(source) > 0 and source[0] == "\\": 2412 append_tag(release_id, tm, tag, 2413 source[1:], ['; ', '/ ']) 2414 else: 2415 pass 2416 2417 # write error messages to tags 2418 if options['log_error'] and "~cea_error" in tm: 2419 for error in str_to_list(tm['~cea_error']): 2420 ecode = error[0] 2421 append_tag(release_id, tm, '001_errors:' + ecode, error) 2422 if options['log_warning'] and "~cea_warning" in tm: 2423 for warning in str_to_list(tm['~cea_warning']): 2424 wcode = warning[0] 2425 append_tag(release_id, tm, '002_warnings:' + wcode, warning) 2426 2427 # delete unwanted tags 2428 if not options['log_debug']: 2429 if '~cea_works_complete' in tm: 2430 del tm['~cea_works_complete'] 2431 if '~cea_artists_complete' in tm: 2432 del tm['~cea_artists_complete'] 2433 del_list = [] 2434 for t in tm: 2435 if 'ce_tag_cleared' in t: 2436 del_list.append(t) 2437 for t in del_list: 2438 del tm[t] 2439 2440 # create hidden tags to flag differences 2441 if options['ce_show_ui_tags'] and options['ce_ui_tags']: 2442 for heading_name, tag_tuple in UI_TAGS: # UI_TAGS is already iterated in main routine, so no need for .items() method here 2443 heading_tag = '~' + heading_name + '_VAL' 2444 for tag in tag_tuple: 2445 if tag[-5:] != '_DIFF': 2446 append_tag(release_id, tm, heading_tag, tm[tag]) 2447 else: 2448 tag = '~' + tag 2449 mirror_tags = str_to_list((tm['~ce_mirror_tags'])) 2450 for mirror_tag in mirror_tags: 2451 mt = interpret(mirror_tag) 2452 st = str_to_list(mt) 2453 (old_tag, new_tag) = tuple(st) 2454 diff_name = old_tag.replace('OLD', 'DIFF') 2455 if diff_name == tag and tm[old_tag] != tm[new_tag]: 2456 tm[diff_name] = '*****' 2457 append_tag(release_id, tm, heading_tag, '*****') 2458 break 2459 2460 # if options over-write enabled, remove it after processing one album 2461 options['ce_options_overwrite'] = False 2462 config.setting['ce_options_overwrite'] = False 2463 # so that options are not retained (in case of refresh with different 2464 # options) 2465 if '~ce_options' in tm: 2466 del tm['~ce_options'] 2467 2468 # remove any unwanted file tags 2469 if '~ce_file' in tm and tm['~ce_file'] != "None": 2470 music_file = tm['~ce_file'] 2471 orig_metadata = album.tagger.files[music_file].orig_metadata 2472 if 'delete_tags' in options and options['delete_tags']: 2473 warn = [] 2474 for delete_item in options['delete_tags']: 2475 if delete_item not in tm: # keep the original for comparison if we have a new version 2476 if delete_item in orig_metadata: 2477 del orig_metadata[delete_item] 2478 if delete_item != '002_warnings:7': # to avoid circularity! 2479 warn.append(delete_item) 2480 if warn and options['log_warning']: 2481 append_tag( 2482 release_id, 2483 tm, 2484 '002_warnings:7', 2485 '7. Deleted tags: ' + 2486 ', '.join(warn)) 2487 write_log( 2488 release_id, 2489 'warning', 2490 'Deleted tags: ' + 2491 ', '.join(warn)) 2492 2493 2494def sort_suffix(tag): 2495 """To determine what sort suffix is appropriate for a given tag""" 2496 if tag == "composer" or tag == "artist" or tag == "albumartist" or tag == "trackartist" or tag == "~cea_MB_artist": 2497 sort = "sort" 2498 else: 2499 sort = "_sort" 2500 return sort 2501 2502 2503def append_tag(release_id, tm, tag, source, separators=None): 2504 """ 2505 Update a tag 2506 :param release_id: name for log file - usually =musicbrainz_albumid 2507 unless called outside metadata processor 2508 :param tm: track metadata 2509 :param tag: tag to be appended to 2510 :param source: item to append to tag 2511 :param separators: characters which may be used to split string into a list 2512 (any of the characters will be a split point) 2513 :return: None. Action is on tm 2514 """ 2515 if not separators: 2516 separators = [] 2517 if tag and tag != "": 2518 if config.setting['log_info']: 2519 write_log( 2520 release_id, 2521 'info', 2522 'Appending source: %r to tag: %s (source is type %s) ...', 2523 source, 2524 tag, 2525 type(source)) 2526 if tag in tm: 2527 write_log( 2528 release_id, 2529 'info', 2530 '... existing tag contents = %r', 2531 tm[tag]) 2532 if source and len(source) > 0: 2533 if isinstance(source, str): 2534 if separators: 2535 source = re.split('|'.join(separators), source) 2536 else: 2537 source = [source] 2538 if not isinstance(source, list): 2539 source = [source] # typically for dict items such as saved options 2540 if all([isinstance(x, str) for x in source]): # only append if if source is a list of strings 2541 if tag not in tm: 2542 if tag == 'artists_sort': 2543 # There is no artists_sort tag in Picard - just a 2544 # hidden var ~artists_sort, so pick up those into the new tag 2545 hidden = tm['~artists_sort'] 2546 if not isinstance(hidden, list): 2547 if separators: 2548 hidden = re.split( 2549 '|'.join(separators), hidden) 2550 for i, h in enumerate(hidden): 2551 hidden[i] = h.strip() 2552 else: 2553 hidden = [hidden] 2554 source = add_list_uniquely(source, hidden) 2555 new_tag = True 2556 else: 2557 new_tag = False 2558 2559 for source_item in source: 2560 if isinstance(source_item, str): 2561 source_item = source_item.replace(u'\u2010', u'-') 2562 source_item = source_item.replace(u'\u2011', u'-') 2563 source_item = source_item.replace(u'\u2019', u"'") 2564 source_item = source_item.replace(u'\u2018', u"'") 2565 source_item = source_item.replace(u'\u201c', u'"') 2566 source_item = source_item.replace(u'\u201d', u'"') 2567 if new_tag: 2568 tm[tag] = [source_item] 2569 new_tag = False 2570 else: 2571 if not isinstance(tm[tag], list): 2572 if separators: 2573 tag_list = re.split( 2574 '|'.join(separators), tm[tag]) 2575 for i, t in enumerate(tag_list): 2576 tag_list[i] = t.strip() 2577 else: 2578 tag_list = [tm[tag]] 2579 else: 2580 tag_list = tm[tag] 2581 if source_item not in tm[tag]: 2582 tag_list.append(source_item) 2583 tm[tag] = tag_list 2584 # NB tag_list is used as metadata object will convert single-item lists to strings 2585 else: # source items are not strings, so just replace 2586 tm[tag] = source 2587 2588def get_artist_credit(options, release_id, obj): 2589 """ 2590 :param release_id: name for log file - usually =musicbrainz_albumid 2591 unless called outside metadata processor 2592 :param options: 2593 :param obj: an XmlNode 2594 :return: a list of as-credited names 2595 """ 2596 name_credit_list = parse_data(release_id, obj, [], 'artist-credit') 2597 credit_list = [] 2598 if name_credit_list: 2599 for name_credits in name_credit_list: 2600 for name_credit in name_credits: 2601 credited_artist = parse_data( 2602 release_id, name_credit, [], 'name') 2603 if credited_artist: 2604 name = parse_data( 2605 release_id, name_credit, [], 'artist', 'name') 2606 sort_name = parse_data( 2607 release_id, name_credit, [], 'artist', 'sort-name') 2608 credit_item = (credited_artist, name, sort_name) 2609 credit_list.append(credit_item) 2610 return credit_list 2611 2612 2613def get_aliases_and_credits( 2614 self, 2615 options, 2616 release_id, 2617 album, 2618 obj, 2619 lang, 2620 credited): 2621 """ 2622 :param release_id: name for log file - usually =musicbrainz_albumid 2623 unless called outside metadata processor 2624 :param album: 2625 :param self: This relates to the object in the class which called this function 2626 :param options: 2627 :param obj: an XmlNode 2628 :param lang: The language selected in the Picard metadata options 2629 :param credited: The options item to determine what as-credited names are being sought 2630 :return: None. Sets self.artist_aliases and self.artist_credits[album] 2631 """ 2632 name_credit_list = parse_data(release_id, obj, [], 'artist-credit') 2633 artist_list = parse_data(release_id, name_credit_list, [], 'artist') 2634 for artist in artist_list: 2635 sort_names = parse_data(release_id, artist, [], 'sort-name') 2636 if sort_names: 2637 aliases = parse_data(release_id, artist, [], 'aliases', 'locale:' + 2638 lang, 'primary:True', 'name') 2639 if aliases: 2640 self.artist_aliases[sort_names[0]] = aliases[0] 2641 if credited: 2642 for name_credit in name_credit_list[0]: 2643 credited_artist = parse_data(release_id, name_credit, [], 'name') 2644 if credited_artist: 2645 sort_name = parse_data( 2646 release_id, name_credit, [], 'artist', 'sort-name') 2647 if sort_name: 2648 self.artist_credits[album][sort_name[0] 2649 ] = credited_artist[0] 2650 2651 2652def get_relation_credits( 2653 self, 2654 options, 2655 release_id, 2656 album, 2657 obj, 2658 lang, 2659 credited): 2660 """ 2661 :param release_id: name for log file - usually =musicbrainz_albumid 2662 unless called outside metadata processor 2663 :param self: 2664 :param options: UI options 2665 :param album: current album 2666 :param obj: Xmlnode 2667 :param lang: language 2668 :param credited: credited-as name 2669 :return: None 2670 Note that direct recording relationships will over-ride indirect ones (via work) 2671 """ 2672 2673 rels = parse_data(release_id, obj, [], 'relations', 'target-type:work', 2674 'work', 'relations', 'target-type:artist') 2675 2676 for artist in rels: 2677 sort_names = parse_data(release_id, artist, [], 'artist', 'sort-name') 2678 if sort_names: 2679 credited_artists = parse_data( 2680 release_id, artist, [], 'target-credit') 2681 if credited_artists and credited_artists[0] != '' and credited: 2682 self.artist_credits[album][sort_names[0] 2683 ] = credited_artists[0] 2684 aliases = parse_data( 2685 release_id, 2686 artist, 2687 [], 2688 'artist', 2689 'aliases', 2690 'locale:' + lang, 2691 'primary:True', 2692 'name') 2693 if aliases: 2694 self.artist_aliases[sort_names[0]] = aliases[0] 2695 2696 rels2 = parse_data(release_id, obj, [], 'relations', 'target-type:artist') 2697 2698 for artist in rels2: 2699 sort_names = parse_data(release_id, artist, [], 'artist', 'sort-name') 2700 if sort_names: 2701 credited_artists = parse_data( 2702 release_id, artist, [], 'target-credit') 2703 if credited_artists and credited_artists[0] != '' and credited: 2704 self.artist_credits[album][sort_names[0] 2705 ] = credited_artists[0] 2706 aliases = parse_data( 2707 release_id, 2708 artist, 2709 [], 2710 'artist', 2711 'aliases', 2712 'locale:' + lang, 2713 'primary:True', 2714 'name') 2715 if aliases: 2716 self.artist_aliases[sort_names[0]] = aliases[0] 2717 2718 2719def composer_last_names(self, release_id, tm, album): 2720 """ 2721 :param release_id: name for log file - usually =musicbrainz_albumid 2722 unless called outside metadata processor 2723 :param self: 2724 :param tm: 2725 :param album: 2726 :return: None 2727 Sets composer last names for album prefixing 2728 """ 2729 if '~cea_album_track_composer_lastnames' in tm: 2730 if not isinstance(tm['~cea_album_track_composer_lastnames'], list): 2731 atc_list = re.split( 2732 '|'.join( 2733 self.SEPARATORS), 2734 tm['~cea_album_track_composer_lastnames']) 2735 else: 2736 atc_list = str_to_list(tm['~cea_album_track_composer_lastnames']) 2737 for atc_item in atc_list: 2738 composer_lastnames = atc_item.strip() 2739 if '~length' in tm and tm['~length']: 2740 track_length = time_to_secs(tm['~length']) 2741 else: 2742 track_length = 0 2743 if album in self.album_artists: 2744 if 'composer_lastnames' in self.album_artists[album]: 2745 if composer_lastnames not in self.album_artists[album]['composer_lastnames']: 2746 self.album_artists[album]['composer_lastnames'][composer_lastnames] = { 2747 'length': track_length} 2748 else: 2749 self.album_artists[album]['composer_lastnames'][composer_lastnames]['length'] += track_length 2750 else: 2751 self.album_artists[album]['composer_lastnames'][composer_lastnames] = { 2752 'length': track_length} 2753 else: 2754 self.album_artists[album]['composer_lastnames'][composer_lastnames] = { 2755 'length': track_length} 2756 else: 2757 write_log( 2758 release_id, 2759 'warning', 2760 "No _cea_album_track_composer_lastnames variable available for recording \"%s\".", 2761 tm['title']) 2762 if 'composer' in tm: 2763 self.append_tag( 2764 release_id, 2765 release_id, 2766 tm, 2767 '~cea_warning', 2768 '1. Composer for this track is not in album artists and will not be available to prefix album') 2769 else: 2770 self.append_tag( 2771 release_id, 2772 release_id, 2773 tm, 2774 '~cea_warning', 2775 '1. No composer for this track, but checking parent work.') 2776 2777 2778def add_list_uniquely(list_to, list_from): 2779 """ 2780 Adds any items in list_from to list_to, if they are not already present 2781 If either arg is a string, it will be converted to a list, e.g. 'abc' -> ['abc'] 2782 :param list_to: 2783 :param list_from: 2784 :return: appends only unique elements of list 2 to list 1 2785 """ 2786 # 2787 if list_to and list_from: 2788 if not isinstance(list_to, list): 2789 list_to = str_to_list(list_to) 2790 if not isinstance(list_from, list): 2791 list_from = str_to_list(list_from) 2792 for list_item in list_from: 2793 if list_item not in list_to: 2794 list_to.append(list_item) 2795 else: 2796 if list_from: 2797 list_to = list_from 2798 return list_to 2799 2800 2801def str_to_list(s): 2802 """ 2803 :param s: 2804 :return: list from string using ; as separator 2805 """ 2806 if isinstance(s, list): 2807 return s 2808 if not isinstance(s, str): 2809 try: 2810 return list(s) 2811 except TypeError: 2812 return [] 2813 else: 2814 if s == '': 2815 return [] 2816 else: 2817 return s.split('; ') 2818 2819 2820def list_to_str(l): 2821 """ 2822 :param l: 2823 :return: string from list using ; as separator 2824 """ 2825 if not isinstance(l, list): 2826 return l 2827 else: 2828 return '; '.join(l) 2829 2830 2831def interpret(tag): 2832 """ 2833 :param tag: 2834 :return: safe form of eval(tag) 2835 """ 2836 if isinstance(tag, str): 2837 try: 2838 tag = tag.strip(' \n\t') 2839 return ast.literal_eval(tag) 2840 except (SyntaxError, ValueError): 2841 return tag 2842 else: 2843 return tag 2844 2845 2846def time_to_secs(a): 2847 """ 2848 :param a: string x:x:x 2849 :return: seconds 2850 converts string times to seconds 2851 """ 2852 ax = a.split(':') 2853 ax = ax[::-1] 2854 t = 0 2855 for i, x in enumerate(ax): 2856 if x.isdigit(): 2857 t += int(x) * (60 ** i) 2858 else: 2859 return 0 2860 return t 2861 2862 2863def seq_last_names(self, album): 2864 """ 2865 Sequences composer last names for album prefix by the total lengths of their tracks 2866 :param self: 2867 :param album: 2868 :return: 2869 """ 2870 ln = [] 2871 if album in self.album_artists and 'composer_lastnames' in self.album_artists[album]: 2872 for x in self.album_artists[album]['composer_lastnames']: 2873 if 'length' in self.album_artists[album]['composer_lastnames'][x]: 2874 ln.append([x, self.album_artists[album] 2875 ['composer_lastnames'][x]['length']]) 2876 else: 2877 return [] 2878 ln = sorted(ln, key=lambda a: a[1]) 2879 ln = ln[::-1] 2880 return [a[0] for a in ln] 2881 2882 2883def year(date): 2884 """ 2885 Return YYYY portion of date(s) in YYYY-MM-DD format (may be incomplete, string or list) 2886 :param date: 2887 :return: YYYY 2888 """ 2889 if isinstance(date, list): 2890 year_list = [blank_if_none(d).split('-')[0] for d in date] 2891 return year_list 2892 else: 2893 date_list = blank_if_none(date).split('-') 2894 return [date_list[0]] 2895 2896 2897def blank_if_none(val): 2898 """ 2899 Make NoneTypes strings 2900 :param val: str or None 2901 :return: str 2902 """ 2903 if not val: 2904 return '' 2905 else: 2906 return val 2907 2908 2909def strip_excess_punctuation(s): 2910 """ 2911 remove orphan punctuation, unmatched quotes and brackets 2912 :param s: string 2913 :return: string 2914 """ 2915 if s: 2916 s_prev = '' 2917 counter = 0 2918 while s != s_prev: 2919 if counter > 100: 2920 break # safety valve 2921 s_prev = s 2922 s = s.replace(' ', ' ') 2923 s = s.strip("&.-:;, ") 2924 s = s.lstrip("!)]}") 2925 s = s.rstrip("([{") 2926 s = s.lstrip(u"\u2019") # Right single quote 2927 s = s.lstrip(u"\u201D") # Right double quote 2928 if s.count(u"\u201E") == 0: # u201E is lower double quote (German etc.) 2929 s = s.rstrip(u"\u201C") # Left double quote - only strip if there is no German-style lower quote present 2930 s = s.rstrip(u"\u2018") # Left single quote 2931 if s.count('"') % 2 != 0: 2932 s = s.strip('"') 2933 if s.count("'") % 2 != 0: 2934 s = s.strip("'") 2935 if len(s) > 0 and s[0] == u"\u201C" and s.count(u"\u201D") == 0: 2936 s = s.lstrip(u"\u201C") 2937 if len(s) > 0 and s[-1] == u"\u201D" and s.count(u"\u201C") == 0 and s.count(u"\u201E") == 0: # only strip if there is no German-style lower quote present 2938 s = s.rstrip(u"\u201D") 2939 if len(s) > 0 and s[0] == u"\u2018" and s.count(u"\u2019") == 0: 2940 s = s.lstrip(u"\u2018") 2941 if len(s) > 0 and s[-1] == u"\u2019" and s.count(u"\u2018") == 0: 2942 s = s.rstrip(u"\u2019") 2943 if s: 2944 if s.count("\"") == 1: 2945 s = s.replace('"', '') 2946 if s.count("\'") == 1: 2947 s = s.replace(" '", " ") 2948 # s = s.replace("' ", " ") # removed to prevent removal of genuine apostrophes 2949 if "(" in s and ")" not in s: 2950 s = s.replace("(", "") 2951 if ")" in s and "(" not in s: 2952 s = s.replace(")", "") 2953 if "[" in s and "]" not in s: 2954 s = s.replace("[", "") 2955 if "]" in s and "[" not in s: 2956 s = s.replace("]", "") 2957 if "{" in s and "}" not in s: 2958 s = s.replace("{", "") 2959 if "}" in s and "{" not in s: 2960 s = s.replace("}", "") 2961 if s: 2962 match_chars = [("(", ")"), ("[", "]"), ("{", "}")] 2963 last = len(s) - 1 2964 for char_pair in match_chars: 2965 if char_pair[0] == s[0] and char_pair[1] == s[last]: 2966 s = s.lstrip(char_pair[0]).rstrip(char_pair[1]) 2967 counter += 1 2968 return s 2969 2970 2971################# 2972################# 2973# EXTRA ARTISTS # 2974################# 2975################# 2976 2977 2978class ExtraArtists(): 2979 2980 # CONSTANTS 2981 def __init__(self): 2982 self.album_artists = collections.defaultdict( 2983 lambda: collections.defaultdict(dict)) 2984 # collection of artists to be applied at album level 2985 2986 self.track_listing = collections.defaultdict(list) 2987 # collection of tracks - format is {album: [track 1, 2988 # track 2, ...]} 2989 2990 self.options = collections.defaultdict(dict) 2991 # collection of Classical Extras options 2992 2993 self.globals = collections.defaultdict(dict) 2994 # collection of global variables for this class 2995 2996 self.album_performers = collections.defaultdict( 2997 lambda: collections.defaultdict(dict)) 2998 # collection of performers who have release relationships, not track 2999 # relationships 3000 3001 self.album_instruments = collections.defaultdict( 3002 lambda: collections.defaultdict(dict)) 3003 # collection of instruments which have release relationships, not track 3004 # relationships 3005 3006 self.artist_aliases = {} 3007 # collection of alias names - format is {sort_name: alias_name, ...} 3008 3009 self.artist_credits = collections.defaultdict(dict) 3010 # collection of credited-as names - format is {album: {sort_name: credit_name, 3011 # ...}, ...} 3012 3013 self.release_artists_sort = collections.defaultdict(list) 3014 # collection of release artists - format is {album: [sort_name_1, 3015 # sort_name_2, ...]} 3016 3017 self.lyricist_filled = collections.defaultdict(dict) 3018 # Boolean for each track to indicate if lyricist has been found (don't 3019 # want to add more from higher levels) 3020 # NB this last one is for completeness - not actually used by 3021 # ExtraArtists, but here to remove pep8 error 3022 3023 self.album_series_list = collections.defaultdict(dict) 3024 # series relationships - format is {'name_list': series names, 'id_list': series ids, 'number_list': number within series} 3025 3026 def add_artist_info( 3027 self, 3028 album, 3029 track_metadata, 3030 trackXmlNode, 3031 releaseXmlNode): 3032 """ 3033 Main routine run for each track of release 3034 :param album: Current release 3035 :param track_metadata: track metadata dictionary 3036 :param trackXmlNode: Everything in the track node downwards 3037 :param releaseXmlNode: Everything in the release node downwards (so includes all track nodes) 3038 :return: 3039 """ 3040 release_id = track_metadata['musicbrainz_albumid'] 3041 if 'start' not in release_status[release_id]: 3042 release_status[release_id]['start'] = datetime.now() 3043 if 'lookups' not in release_status[release_id]: 3044 release_status[release_id]['lookups'] = 0 3045 release_status[release_id]['name'] = track_metadata['album'] 3046 release_status[release_id]['artists'] = True 3047 if config.setting['log_debug'] or config.setting['log_info']: 3048 write_log( 3049 release_id, 3050 'debug', 3051 'STARTING ARTIST PROCESSING FOR ALBUM %s, DISC %s, TRACK %s', 3052 track_metadata['album'], 3053 track_metadata['discnumber'], 3054 track_metadata['tracknumber'] + 3055 ' ' + 3056 track_metadata['title']) 3057 # write_log(release_id, 'info', 'trackXmlNode = %s', trackXmlNode) # NB can crash Picard 3058 # write_log('info', 'releaseXmlNode = %s', releaseXmlNode) # NB can crash Picard 3059 # Jump through hoops to get track object!! 3060 track = album._new_tracks[-1] 3061 tm = track.metadata 3062 3063 # OPTIONS - OVER-RIDE IF REQUIRED 3064 if '~ce_options' not in tm: 3065 if config.setting['log_debug'] or config.setting['log_info']: 3066 write_log(release_id, 'debug', 'Artists gets track first...') 3067 get_options(release_id, album, track) 3068 options = interpret(tm['~ce_options']) 3069 if not options: 3070 if config.setting["log_error"]: 3071 write_log( 3072 release_id, 3073 'error', 3074 'Artists. Failure to read saved options for track %s. options = %s', 3075 track, 3076 tm['~ce_options']) 3077 options = option_settings(config.setting) 3078 self.options[track] = options 3079 3080 # CONSTANTS 3081 self.ERROR = options["log_error"] 3082 self.WARNING = options["log_warning"] 3083 self.ORCHESTRAS = options["cea_orchestras"].split(',') 3084 self.CHOIRS = options["cea_choirs"].split(',') 3085 self.GROUPS = options["cea_groups"].split(',') 3086 self.ENSEMBLE_TYPES = self.ORCHESTRAS + self.CHOIRS + self.GROUPS 3087 self.SEPARATORS = ['; ', '/ ', ';', '/'] 3088 3089 # continue? 3090 if not options["classical_extra_artists"]: 3091 return 3092 # album_files is not used - this is just for logging 3093 album_files = album.tagger.get_files_from_objects([album]) 3094 if options['log_info']: 3095 write_log( 3096 release_id, 3097 'info', 3098 'ALBUM FILENAMES for album %r = %s', 3099 album, 3100 album_files) 3101 3102 if not ( 3103 options["ce_no_run"] and ( 3104 not tm['~ce_file'] or tm['~ce_file'] == "None")): 3105 # continue 3106 write_log( 3107 release_id, 3108 'debug', 3109 "ExtraArtists - add_artist_info") 3110 if album not in self.track_listing or track not in self.track_listing[album]: 3111 self.track_listing[album].append(track) 3112 # fix odd hyphens in names for consistency 3113 field_types = ['~albumartists', '~albumartists_sort'] 3114 for field_type in field_types: 3115 if field_type in tm: 3116 field = tm[field_type] 3117 if isinstance(field, list): 3118 for x, it in enumerate(field): 3119 field[x] = it.replace(u'\u2010', u'-') 3120 elif isinstance(field, str): 3121 field = field.replace(u'\u2010', u'-') 3122 else: 3123 pass 3124 tm[field_type] = field 3125 3126 # first time for this album (reloads each refresh) 3127 if tm['discnumber'] == '1' and tm['tracknumber'] == '1': 3128 # get artist aliases - these are cached so can be re-used across 3129 # releases, but are reloaded with each refresh 3130 get_aliases(self, release_id, album, options, releaseXmlNode) 3131 3132 # xml_type = 'release' 3133 # get performers etc who are related at the release level 3134 relation_list = parse_data( 3135 release_id, releaseXmlNode, [], 'relations') 3136 album_performerList = get_artists( 3137 options, release_id, tm, relation_list, 'release')['artists'] 3138 self.album_performers[album] = album_performerList 3139 album_instrumentList = get_artists( 3140 options, release_id, tm, relation_list, 'release')['instruments'] 3141 self.album_instruments[album] = album_instrumentList 3142 3143 # get series information 3144 self.album_series_list = get_series( 3145 options, release_id, relation_list) 3146 3147 else: 3148 if album in self.album_performers: 3149 album_performerList = self.album_performers[album] 3150 else: 3151 album_performerList = [] 3152 if album in self.album_instruments and self.album_instruments[album]: 3153 tm['~cea_instruments'] = self.album_instruments[album][0] 3154 tm['~cea_instruments_credited'] = self.album_instruments[album][1] 3155 tm['~cea_instruments_all'] = self.album_instruments[album][2] 3156 # Should be OK to initialise these here as recording artists 3157 # yet to be processed 3158 3159 # Fill release info not given by vanilla Picard 3160 if self.album_series_list: 3161 tm['series'] = self.album_series_list['name_list'] if 'name_list' in self.album_series_list else None 3162 tm['musicbrainz_seriesid'] = self.album_series_list['id_list'] if 'id_list' in self.album_series_list else None 3163 tm['series_number'] = self.album_series_list['number_list'] if 'number_list' in self.album_series_list else None 3164 ## TODO add label id too 3165 recording_relation_list = parse_data( 3166 release_id, trackXmlNode, [], 'recording', 'relations') 3167 recording_series_list = get_series( 3168 options, release_id, recording_relation_list) 3169 write_log( 3170 release_id, 3171 'info', 3172 'Recording_series_list = %s', 3173 recording_series_list) 3174 3175 track_artist_list = parse_data( 3176 release_id, trackXmlNode, [], 'artist-credit') 3177 if track_artist_list: 3178 track_artist = [] 3179 track_artistsort = [] 3180 track_artists = [] 3181 track_artists_sort = [] 3182 locale = config.setting["artist_locale"] 3183 # NB this is the Picard code in /util 3184 lang = locale.split("_")[0] 3185 3186 # Set naming option 3187 # Put naming style into preferential list 3188 3189 # naming as for vanilla Picard for track artists 3190 3191 if options['translate_artist_names'] and lang: 3192 name_style = ['alias', 'sort'] 3193 # documentation indicates that processing should be as below, 3194 # but processing above appears to reflect what vanilla Picard actually does 3195 # if options['standardize_artists']: 3196 # name_style = ['alias', 'sort'] 3197 # else: 3198 # name_style = ['alias', 'credit', 'sort'] 3199 else: 3200 if not options['standardize_artists']: 3201 name_style = ['credit'] 3202 else: 3203 name_style = [] 3204 write_log( 3205 release_id, 3206 'info', 3207 'Priority order of naming style for track artists = %s', 3208 name_style) 3209 styled_artists = apply_artist_style( 3210 options, 3211 release_id, 3212 lang, 3213 track_artist_list, 3214 name_style, 3215 track_artist, 3216 track_artistsort, 3217 track_artists, 3218 track_artists_sort) 3219 tm['artists'] = styled_artists['artists'] 3220 tm['~artists_sort'] = styled_artists['artists_sort'] 3221 tm['artist'] = styled_artists['artist'] 3222 tm['artistsort'] = styled_artists['artistsort'] 3223 3224 if 'recording' in trackXmlNode: 3225 self.globals[track]['is_recording'] = True 3226 write_log(release_id, 'debug', 'Getting recording details') 3227 recording = trackXmlNode['recording'] 3228 if not isinstance(recording, list): 3229 recording = [recording] 3230 for record in recording: 3231 rec_type = type(record) 3232 write_log(release_id, 'info', 'rec-type = %s', rec_type) 3233 write_log(release_id, 'info', record) 3234 # Note that the lists below reflect https://musicbrainz.org/relationships/artist-recording 3235 # Any changes to that DB structure will require changes 3236 # here 3237 3238 # get recording artists data 3239 recording_artist_list = parse_data( 3240 release_id, record, [], 'artist-credit') 3241 if recording_artist_list: 3242 recording_artist = [] 3243 recording_artistsort = [] 3244 recording_artists = [] 3245 recording_artists_sort = [] 3246 locale = config.setting["artist_locale"] 3247 # NB this is the Picard code in /util 3248 lang = locale.split("_")[0] 3249 3250 # Set naming option 3251 # Put naming style into preferential list 3252 3253 # naming as for vanilla Picard for track artists (per 3254 # documentation rather than actual?) 3255 if options['cea_ra_trackartist']: 3256 if options['translate_artist_names'] and lang: 3257 if options['standardize_artists']: 3258 name_style = ['alias', 'sort'] 3259 else: 3260 name_style = ['alias', 'credit', 'sort'] 3261 else: 3262 if not options['standardize_artists']: 3263 name_style = ['credit'] 3264 else: 3265 name_style = [] 3266 # naming as for performers in classical extras 3267 elif options['cea_ra_performer']: 3268 if options['cea_aliases']: 3269 if options['cea_alias_overrides']: 3270 name_style = ['alias', 'credit'] 3271 else: 3272 name_style = ['credit', 'alias'] 3273 else: 3274 name_style = ['credit'] 3275 3276 else: 3277 name_style = [] 3278 write_log( 3279 release_id, 3280 'info', 3281 'Priority order of naming style for recording artists = %s', 3282 name_style) 3283 3284 styled_artists = apply_artist_style( 3285 options, 3286 release_id, 3287 lang, 3288 recording_artist_list, 3289 name_style, 3290 recording_artist, 3291 recording_artistsort, 3292 recording_artists, 3293 recording_artists_sort) 3294 self.append_tag( 3295 release_id, 3296 tm, 3297 '~cea_recording_artists', 3298 styled_artists['artists']) 3299 self.append_tag( 3300 release_id, 3301 tm, 3302 '~cea_recording_artists_sort', 3303 styled_artists['artists_sort']) 3304 self.append_tag( 3305 release_id, 3306 tm, 3307 '~cea_recording_artist', 3308 styled_artists['artist']) 3309 self.append_tag( 3310 release_id, 3311 tm, 3312 '~cea_recording_artistsort', 3313 styled_artists['artistsort']) 3314 3315 else: 3316 tm['~cea_recording_artists'] = '' 3317 tm['~cea_recording_artists_sort'] = '' 3318 tm['~cea_recording_artist'] = '' 3319 tm['~cea_recording_artistsort'] = '' 3320 3321 # use recording artist options 3322 tm['~cea_MB_artist'] = str_to_list(tm['artist']) 3323 tm['~cea_MB_artistsort'] = str_to_list(tm['artistsort']) 3324 tm['~cea_MB_artists'] = str_to_list(tm['artists']) 3325 tm['~cea_MB_artists_sort'] = str_to_list(tm['~artists_sort']) 3326 3327 if options['cea_ra_use']: 3328 if options['cea_ra_replace_ta']: 3329 if tm['~cea_recording_artist']: 3330 tm['artist'] = str_to_list(tm['~cea_recording_artist']) 3331 tm['artistsort'] = str_to_list(tm['~cea_recording_artistsort']) 3332 tm['artists'] = str_to_list(tm['~cea_recording_artists']) 3333 tm['~artists_sort'] = str_to_list(tm['~cea_recording_artists_sort']) 3334 elif not options['cea_ra_noblank_ta']: 3335 tm['artist'] = '' 3336 tm['artistsort'] = '' 3337 tm['artists'] = '' 3338 tm['~artists_sort'] = '' 3339 elif options['cea_ra_merge_ta']: 3340 if tm['~cea_recording_artist']: 3341 tm['artists'] = add_list_uniquely( 3342 tm['artists'], tm['~cea_recording_artists']) 3343 tm['~artists_sort'] = add_list_uniquely( 3344 tm['~artists_sort'], tm['~cea_recording_artists_sort']) 3345 if tm['artist'] != tm['~cea_recording_artist']: 3346 tm['artist'] = tm['artist'] + \ 3347 ' (' + tm['~cea_recording_artist'] + ')' 3348 tm['artistsort'] = tm['artistsort'] + \ 3349 ' (' + tm['~cea_recording_artistsort'] + ')' 3350 3351 # xml_type = 'recording' 3352 relation_list = parse_data( 3353 release_id, record, [], 'relations') 3354 performerList = album_performerList + \ 3355 get_artists(options, release_id, tm, relation_list, 'recording')['artists'] 3356 # returns 3357 # [(artist type, instrument or None, artist name, artist sort name, instrument sort, type sort)] 3358 # where instrument sort places solo ahead of additional etc. 3359 # and type sort applies a custom sequencing to the artist 3360 # types 3361 if performerList: 3362 write_log( 3363 release_id, 'info', "Performers: %s", performerList) 3364 self.set_performer( 3365 release_id, album, track, performerList, tm) 3366 if not options['classical_work_parts']: 3367 work_artist_list = parse_data( 3368 release_id, 3369 record, 3370 [], 3371 'relations', 3372 'target-type:work', 3373 'type:performance', 3374 'work', 3375 'relations', 3376 'target-type:artist') 3377 work_artists = get_artists( 3378 options, release_id, tm, work_artist_list, 'work')['artists'] 3379 set_work_artists( 3380 self, release_id, album, track, work_artists, tm, 0) 3381 # otherwise composers etc. will be set in work parts 3382 else: 3383 self.globals[track]['is_recording'] = False 3384 else: 3385 tm['000_major_warning'] = "WARNING: Classical Extras not run for this track as no file present - " \ 3386 "deselect the option on the advanced tab to run. If there is a file, then try 'Refresh'." 3387 if track_metadata['tracknumber'] == track_metadata['totaltracks'] and track_metadata[ 3388 'discnumber'] == track_metadata['totaldiscs']: # last track 3389 self.process_album(release_id, album) 3390 release_status[release_id]['artists-done'] = datetime.now() 3391 close_log(release_id, 'artists') 3392 3393 # Checks for ensembles 3394 def ensemble_type(self, performer): 3395 """ 3396 Returns ensemble types 3397 :param performer: 3398 :return: 3399 """ 3400 for ensemble_name in self.ORCHESTRAS: 3401 ensemble = re.compile( 3402 r'(.*)\b' + 3403 ensemble_name + 3404 r'\b(.*)', 3405 re.IGNORECASE) 3406 if ensemble.search(performer): 3407 return 'Orchestra' 3408 for ensemble_name in self.CHOIRS: 3409 ensemble = re.compile( 3410 r'(.*)\b' + 3411 ensemble_name + 3412 r'\b(.*)', 3413 re.IGNORECASE) 3414 if ensemble.search(performer): 3415 return 'Choir' 3416 for ensemble_name in self.GROUPS: 3417 ensemble = re.compile( 3418 r'(.*)\b' + 3419 ensemble_name + 3420 r'\b(.*)', 3421 re.IGNORECASE) 3422 if ensemble.search(performer): 3423 return 'Group' 3424 return False 3425 3426 def process_album(self, release_id, album): 3427 """ 3428 Perform final processing after all tracks read 3429 :param release_id: name for log file - usually =musicbrainz_albumid 3430 unless called outside metadata processor 3431 :param album: 3432 :return: 3433 """ 3434 write_log( 3435 release_id, 3436 'debug', 3437 'ExtraArtists: Starting process_album') 3438 # process lyrics tags 3439 write_log(release_id, 'debug', 'Starting lyrics processing') 3440 common = [] 3441 tmlyrics_dict = {} 3442 tmlyrics_sort = [] 3443 options = {} 3444 for track in self.track_listing[album]: 3445 options = self.options[track] 3446 if options['cea_split_lyrics'] and options['cea_lyrics_tag']: 3447 tm = track.metadata 3448 lyrics_tag = options['cea_lyrics_tag'] 3449 if tm[lyrics_tag]: 3450 # turn text into word lists to speed processing 3451 tmlyrics_dict[track] = tm[lyrics_tag].split() 3452 if tmlyrics_dict: 3453 tmlyrics_sort = sorted( 3454 tmlyrics_dict.items(), 3455 key=operator.itemgetter(1)) 3456 prev = None 3457 first_track = None 3458 unique_lyrics = [] 3459 ref_track = {} 3460 for lyric_tuple in tmlyrics_sort: # tuple is (track, lyrics) 3461 if lyric_tuple[1] != prev: 3462 unique_lyrics.append(lyric_tuple[1]) 3463 first_track = lyric_tuple[0] 3464 ref_track[lyric_tuple[0]] = first_track 3465 prev = lyric_tuple[1] 3466 common = turbo_lcs( 3467 release_id, 3468 unique_lyrics) 3469 3470 if common: 3471 unique = [] 3472 for tup in tmlyrics_sort: 3473 track = tup[0] 3474 ref = ref_track[track] 3475 if track == ref: 3476 start = substart_finder(tup[1], common) 3477 length = len(common) 3478 end = min(start + length, len(tup[1])) 3479 unique = tup[1][:start] + tup[1][end:] 3480 3481 options = self.options[track] 3482 if options['cea_split_lyrics'] and options['cea_lyrics_tag']: 3483 tm = track.metadata 3484 if unique: 3485 tm['~cea_track_lyrics'] = ' '.join(unique) 3486 tm['~cea_album_lyrics'] = ' '.join(common) 3487 if options['cea_album_lyrics']: 3488 tm[options['cea_album_lyrics']] = tm['~cea_album_lyrics'] 3489 if unique and options['cea_track_lyrics']: 3490 tm[options['cea_track_lyrics']] = tm['~cea_track_lyrics'] 3491 else: 3492 for track in self.track_listing[album]: 3493 options = self.options[track] 3494 if options['cea_split_lyrics'] and options['cea_lyrics_tag']: 3495 tm['~cea_track_lyrics'] = tm[options['cea_lyrics_tag']] 3496 if options['cea_track_lyrics']: 3497 tm[options['cea_track_lyrics']] = tm['~cea_track_lyrics'] 3498 write_log(release_id, 'debug', 'Ending lyrics processing') 3499 3500 for track in self.track_listing[album]: 3501 self.write_metadata(release_id, options, album, track) 3502 self.track_listing[album] = [] 3503 write_log( 3504 release_id, 3505 'info', 3506 "FINISHED Classical Extra Artists. Album: %s", 3507 album) 3508 3509 3510 def write_metadata(self, release_id, options, album, track): 3511 """ 3512 Write the metadata for this track 3513 :param release_id: 3514 :param options: 3515 :param album: 3516 :param track: 3517 :return: 3518 """ 3519 options = self.options[track] 3520 tm = track.metadata 3521 tm['~cea_version'] = PLUGIN_VERSION 3522 3523 # set inferred genres before any tags are blanked 3524 if options['cwp_genres_infer']: 3525 self.infer_genres(release_id, options, track, tm) 3526 3527 # album 3528 if not options['classical_work_parts']: 3529 if 'composer_lastnames' in self.album_artists[album]: 3530 last_names = seq_last_names(self, album) 3531 self.append_tag( 3532 release_id, 3533 tm, 3534 '~cea_album_composer_lastnames', 3535 last_names) 3536 # otherwise this is done in the workparts class, which has all 3537 # composer info 3538 3539 # process tag mapping 3540 tm['~cea_artists_complete'] = "Y" 3541 map_tags(options, release_id, album, tm) 3542 3543 # write out options and errors/warnings to tags 3544 if options['cea_options_tag'] != "": 3545 self.cea_options = collections.defaultdict( 3546 lambda: collections.defaultdict( 3547 lambda: collections.defaultdict(dict))) 3548 3549 for opt in plugin_options( 3550 'artists') + plugin_options('tag') + plugin_options('picard'): 3551 if 'name' in opt: 3552 if 'value' in opt: 3553 if options[opt['option']]: 3554 self.cea_options['Classical Extras']['Artists options'][opt['name']] = opt['value'] 3555 else: 3556 self.cea_options['Classical Extras']['Artists options'][opt['name'] 3557 ] = options[opt['option']] 3558 3559 for opt in plugin_options('tag_detail'): 3560 if opt['option'] != "": 3561 name_list = opt['name'].split("_") 3562 self.cea_options['Classical Extras']['Artists options'][name_list[0] 3563 ][name_list[1]] = options[opt['option']] 3564 3565 if options['ce_version_tag'] and options['ce_version_tag'] != "": 3566 self.append_tag(release_id, tm, options['ce_version_tag'], str( 3567 'Version ' + tm['~cea_version'] + ' of Classical Extras')) 3568 if options['cea_options_tag'] and options['cea_options_tag'] != "": 3569 self.append_tag( 3570 release_id, 3571 tm, 3572 options['cea_options_tag'] + 3573 ':artists_options', 3574 json.loads( 3575 json.dumps( 3576 self.cea_options))) 3577 3578 3579 def infer_genres(self, release_id, options, track, tm): 3580 """ 3581 Infer a genre from the artist/instrument metadata 3582 :param release_id: 3583 :param options: 3584 :param track: 3585 :param tm: track metadata 3586 :return: 3587 """ 3588 # Note that this is now mixed in with other sources of genres in def map_tags 3589 # ~cea_work_type_if_classical is used for types that are specifically classical 3590 # and is only applied in map_tags if the track is deemed to be 3591 # classical 3592 if (self.globals[track]['is_recording'] and options['classical_work_parts'] 3593 and '~artists_sort' in tm and 'composersort' in tm 3594 and any(x in tm['~artists_sort'] for x in tm['composersort']) 3595 and 'writer' not in tm 3596 and not any(x in tm['~artists_sort'] for x in tm['~cea_performers_sort'])): 3597 self.append_tag( 3598 release_id, tm, '~cea_work_type', 'Classical') 3599 3600 if isinstance(tm['~cea_soloists'], str): 3601 soloists = re.split( 3602 '|'.join( 3603 self.SEPARATORS), 3604 tm['~cea_soloists']) 3605 else: 3606 soloists = tm['~cea_soloists'] 3607 if '~cea_vocalists' in tm: 3608 if isinstance(tm['~cea_vocalists'], str): 3609 vocalists = re.split( 3610 '|'.join( 3611 self.SEPARATORS), 3612 tm['~cea_vocalists']) 3613 else: 3614 vocalists = tm['~cea_vocalists'] 3615 else: 3616 vocalists = [] 3617 3618 if '~cea_ensembles' in tm: 3619 large = False 3620 if 'performer:orchestra' in tm: 3621 large = True 3622 self.append_tag( 3623 release_id, tm, '~cea_work_type_if_classical', 'Orchestral') 3624 if '~cea_soloists' in tm: 3625 if 'vocals' in tm['~cea_instruments_all']: 3626 self.append_tag( 3627 release_id, tm, '~cea_work_type', 'Vocal') 3628 if len(soloists) == 1: 3629 if soloists != vocalists: 3630 self.append_tag( 3631 release_id, tm, '~cea_work_type_if_classical', 'Concerto') 3632 else: 3633 self.append_tag( 3634 release_id, tm, '~cea_work_type_if_classical', 'Aria') 3635 elif len(soloists) == 2: 3636 self.append_tag( 3637 release_id, tm, '~cea_work_type_if_classical', 'Duet') 3638 if not vocalists: 3639 self.append_tag( 3640 release_id, tm, '~cea_work_type_if_classical', 'Concerto') 3641 elif len(soloists) == 3: 3642 self.append_tag( 3643 release_id, tm, '~cea_work_type_if_classical', 'Trio') 3644 elif len(soloists) == 4: 3645 self.append_tag( 3646 release_id, tm, '~cea_work_type_if_classical', 'Quartet') 3647 3648 if 'performer:choir' in tm or 'performer:choir vocals' in tm: 3649 large = True 3650 self.append_tag( 3651 release_id, tm, '~cea_work_type_if_classical', 'Choral') 3652 self.append_tag( 3653 release_id, tm, '~cea_work_type', 'Vocal') 3654 else: 3655 if large and 'soloists' in tm and tm['soloists'].count( 3656 'vocals') > 1: 3657 self.append_tag( 3658 release_id, tm, '~cea_work_type_if_classical', 'Opera') 3659 if not large: 3660 if '~cea_soloists' not in tm: 3661 self.append_tag( 3662 release_id, tm, '~cea_work_type_if_classical', 'Chamber music') 3663 else: 3664 if vocalists: 3665 self.append_tag( 3666 release_id, tm, '~cea_work_type', 'Song') 3667 self.append_tag( 3668 release_id, tm, '~cea_work_type', 'Vocal') 3669 else: 3670 self.append_tag( 3671 release_id, tm, '~cea_work_type_if_classical', 'Chamber music') 3672 else: 3673 if len(soloists) == 1: 3674 if vocalists != soloists: 3675 self.append_tag( 3676 release_id, tm, '~cea_work_type', 'Instrumental') 3677 else: 3678 self.append_tag( 3679 release_id, tm, '~cea_work_type', 'Song') 3680 self.append_tag( 3681 release_id, tm, '~cea_work_type', 'Vocal') 3682 elif len(soloists) == 2: 3683 self.append_tag( 3684 release_id, tm, '~cea_work_type_if_classical', 'Duet') 3685 elif len(soloists) == 3: 3686 self.append_tag( 3687 release_id, tm, '~cea_work_type_if_classical', 'Trio') 3688 elif len(soloists) == 4: 3689 self.append_tag( 3690 release_id, tm, '~cea_work_type_if_classical', 'Quartet') 3691 else: 3692 if not vocalists: 3693 self.append_tag( 3694 release_id, tm, '~cea_work_type_if_classical', 'Chamber music') 3695 else: 3696 self.append_tag( 3697 release_id, tm, '~cea_work_type', 'Song') 3698 self.append_tag( 3699 release_id, tm, '~cea_work_type', 'Vocal') 3700 3701 3702 def append_tag(self, release_id, tm, tag, source): 3703 """ 3704 :param release_id: name for log file - usually =musicbrainz_albumid 3705 unless called outside metadata processor 3706 :param tm: 3707 :param tag: 3708 :param source: 3709 :return: 3710 """ 3711 write_log( 3712 release_id, 3713 'info', 3714 "Extra Artists - appending %s to %s", 3715 source, 3716 tag) 3717 append_tag(release_id, tm, tag, source, self.SEPARATORS) 3718 3719 def set_performer(self, release_id, album, track, performerList, tm): 3720 """ 3721 Sets the performer-related tags 3722 :param release_id: name for log file - usually =musicbrainz_albumid 3723 unless called outside metadata processor 3724 :param album: 3725 :param track: 3726 :param performerList: see below 3727 :param tm: 3728 :return: 3729 """ 3730 # performerList is in format [(artist_type, [instrument list],[name list],[sort_name list], 3731 # instrument_sort, type_sort),(.....etc] 3732 # Sorted by type_sort then sort name then instrument_sort 3733 write_log(release_id, 'debug', "Extra Artists - set_performer") 3734 write_log(release_id, 'info', "Performer list is:") 3735 write_log(release_id, 'info', performerList) 3736 options = self.options[track] 3737 # tag strings are a tuple (Picard tag, cea tag, Picard sort tag, cea 3738 # sort tag) 3739 tag_strings = const.tag_strings('~cea') 3740 # insertions lists artist types where names in the main Picard tags may be updated for annotations 3741 # (not for performer types as Picard will write performer:inst as Performer name (inst) ) 3742 insertions = const.INSERTIONS 3743 3744 # First remove all existing performer tags 3745 del_list = [] 3746 for meta in tm: 3747 if 'performer' in meta: 3748 del_list.append(meta) 3749 for del_item in del_list: 3750 del tm[del_item] 3751 last_artist = [] 3752 last_inst_list = [] 3753 last_instrument = None 3754 artist_inst = [] 3755 artist_inst_list = {} 3756 for performer in performerList: 3757 artist_type = performer[0] 3758 if artist_type not in tag_strings: 3759 return None 3760 if artist_type in ['instrument', 'vocal', 'performing orchestra']: 3761 if performer[1]: 3762 inst_list = performer[1] 3763 attrib_list = [] 3764 for attrib in ['solo', 'guest', 'additional']: 3765 if attrib in inst_list: 3766 inst_list.remove(attrib) 3767 attrib_list.append(attrib) 3768 attribs = " ".join(attrib_list) 3769 instrument = ", ".join(inst_list) 3770 if not options['cea_no_solo'] and attrib_list: 3771 instrument = attribs + " " + instrument 3772 if performer[3] == last_artist: 3773 if instrument != last_instrument: 3774 artist_inst.append(instrument) 3775 else: 3776 if inst_list == last_inst_list: 3777 write_log( 3778 release_id, 'warning', 'Duplicated performer information for %s' 3779 ' (may be in Release Relationship as well as Track Relationship).' 3780 ' Duplicates have been ignored.', performer[3]) 3781 if self.WARNING: 3782 self.append_tag( 3783 release_id, 3784 tm, 3785 '~cea_warning', 3786 '2. Duplicated performer information for "' + 3787 '; '.join( 3788 performer[3]) + 3789 '" (may be in Release Relationship as well as Track Relationship).' 3790 ' Duplicates have been ignored.') 3791 else: 3792 artist_inst = [instrument] 3793 last_artist = performer[3] 3794 last_inst_list = inst_list 3795 last_instrument = instrument 3796 3797 instrument = ", ".join(artist_inst) 3798 else: 3799 instrument = None 3800 if artist_type == 'performing orchestra': 3801 instrument = 'orchestra' 3802 artist_inst_list[tuple(performer[3])] = instrument 3803 for performer in performerList: 3804 artist_type = performer[0] 3805 if artist_type not in tag_strings: 3806 return None 3807 performing_artist = False if artist_type in [ 3808 'arranger', 'instrument arranger', 'orchestrator', 'vocal arranger'] else True 3809 if True and artist_type in [ 3810 'instrument', 3811 'vocal', 3812 'performing orchestra']: # There may be an option here (to replace 'True') 3813 # Currently groups instruments by artist - alternative has been 3814 # tested if required 3815 instrument = artist_inst_list[tuple(performer[3])] 3816 else: 3817 if performer[1]: 3818 inst_list = performer[1] 3819 if options['cea_no_solo']: 3820 for attrib in ['solo', 'guest', 'additional']: 3821 if attrib in inst_list: 3822 inst_list.remove(attrib) 3823 instrument = " ".join(inst_list) 3824 else: 3825 instrument = None 3826 if artist_type == 'performing orchestra': 3827 instrument = 'orchestra' 3828 sub_strings = {'instrument': instrument, 3829 'vocal': instrument # , 3830 # 'instrument arranger': instrument, 3831 # 'vocal arranger': instrument 3832 } 3833 for typ in ['concertmaster']: 3834 if options['cea_' + typ] and options['cea_arrangers']: 3835 sub_strings[typ] = ':' + options['cea_' + typ] 3836 3837 if options['cea_arranger']: 3838 if instrument: 3839 arr_inst = options['cea_arranger'] + ' ' + instrument 3840 else: 3841 arr_inst = options['cea_arranger'] 3842 else: 3843 arr_inst = instrument 3844 annotations = {'instrument': instrument, 3845 'vocal': instrument, 3846 'performing orchestra': instrument, 3847 'chorus master': options['cea_chorusmaster'], 3848 'concertmaster': options['cea_concertmaster'], 3849 'arranger': options['cea_arranger'], 3850 'instrument arranger': arr_inst, 3851 'orchestrator': options['cea_orchestrator'], 3852 'vocal arranger': arr_inst} 3853 tag = tag_strings[artist_type][0] 3854 cea_tag = tag_strings[artist_type][1] 3855 sort_tag = tag_strings[artist_type][2] 3856 cea_sort_tag = tag_strings[artist_type][3] 3857 cea_names_tag = cea_tag[:-1] + '_names' 3858 cea_instrumented_tag = cea_names_tag + '_instrumented' 3859 if artist_type in sub_strings: 3860 if sub_strings[artist_type]: 3861 tag += sub_strings[artist_type] 3862 else: 3863 write_log( 3864 release_id, 3865 'warning', 3866 'No instrument/sub-key available for artist_type %s. Performer = %s. Track is %s', 3867 artist_type, 3868 performer[2], 3869 track) 3870 3871 if tag: 3872 if '~ce_tag_cleared_' + \ 3873 tag not in tm or not tm['~ce_tag_cleared_' + tag] == "Y": 3874 if tag in tm: 3875 write_log(release_id, 'info', 'delete tag %s', tag) 3876 del tm[tag] 3877 tm['~ce_tag_cleared_' + tag] = "Y" 3878 if sort_tag: 3879 if '~ce_tag_cleared_' + \ 3880 sort_tag not in tm or not tm['~ce_tag_cleared_' + sort_tag] == "Y": 3881 if sort_tag in tm: 3882 del tm[sort_tag] 3883 tm['~ce_tag_cleared_' + sort_tag] = "Y" 3884 3885 name_list = performer[2] 3886 for ind, name in enumerate(name_list): 3887 performer_type = '' 3888 sort_name = performer[3][ind] 3889 no_credit = True 3890 # change name to as-credited 3891 if (performing_artist and options['cea_performer_credited'] or 3892 not performing_artist and options['cea_composer_credited']): 3893 if sort_name in self.artist_credits[album]: 3894 no_credit = False 3895 name = self.artist_credits[album][sort_name] 3896 # over-ride with aliases and use standard MB name (not 3897 # as-credited) if no alias 3898 if (options['cea_aliases'] or not performing_artist and options['cea_aliases_composer']) and ( 3899 no_credit or options['cea_alias_overrides']): 3900 if sort_name in self.artist_aliases: 3901 name = self.artist_aliases[sort_name] 3902 # fix cyrillic names if not already fixed 3903 if options['cea_cyrillic']: 3904 if not only_roman_chars(name): 3905 name = remove_middle(unsort(sort_name)) 3906 # Only remove middle name where the existing 3907 # performer is in non-latin script 3908 annotated_name = name 3909 if instrument: 3910 instrumented_name = name + ' (' + instrument + ')' 3911 else: 3912 instrumented_name = name 3913 # add annotations and write performer tags 3914 if artist_type in annotations: 3915 if annotations[artist_type]: 3916 annotated_name += ' (' + annotations[artist_type] + ')' 3917 else: 3918 write_log( 3919 release_id, 3920 'warning', 3921 'No annotation (instrument) available for artist_type %s.' 3922 ' Performer = %s. Track is %s', 3923 artist_type, 3924 performer[2], 3925 track) 3926 if artist_type in insertions and options['cea_arrangers']: 3927 self.append_tag(release_id, tm, tag, annotated_name) 3928 else: 3929 if options['cea_arrangers'] or artist_type == tag: 3930 self.append_tag(release_id, tm, tag, name) 3931 3932 if options['cea_arrangers'] or artist_type == tag: 3933 if sort_tag: 3934 self.append_tag(release_id, tm, sort_tag, sort_name) 3935 if options['cea_tag_sort'] and '~' in sort_tag: 3936 explicit_sort_tag = sort_tag.replace('~', '') 3937 self.append_tag( 3938 release_id, tm, explicit_sort_tag, sort_name) 3939 3940 self.append_tag(release_id, tm, cea_tag, annotated_name) 3941 self.append_tag(release_id, tm, cea_names_tag, name) 3942 if instrumented_name != name: 3943 self.append_tag( 3944 release_id, 3945 tm, 3946 cea_instrumented_tag, 3947 instrumented_name) 3948 3949 if cea_sort_tag: 3950 self.append_tag(release_id, tm, cea_sort_tag, sort_name) 3951 3952 # differentiate soloists etc and write related tags 3953 if artist_type == 'performing orchestra' or ( 3954 instrument and instrument in self.ENSEMBLE_TYPES) or self.ensemble_type(name): 3955 performer_type = 'ensembles' 3956 self.append_tag( 3957 release_id, tm, '~cea_ensembles', instrumented_name) 3958 self.append_tag( 3959 release_id, tm, '~cea_ensemble_names', name) 3960 self.append_tag( 3961 release_id, tm, '~cea_ensembles_sort', sort_name) 3962 elif artist_type in ['performer', 'instrument', 'vocal']: 3963 performer_type = 'soloists' 3964 self.append_tag( 3965 release_id, tm, '~cea_soloists', instrumented_name) 3966 self.append_tag(release_id, tm, '~cea_soloist_names', name) 3967 self.append_tag( 3968 release_id, tm, '~cea_soloists_sort', sort_name) 3969 if artist_type == "vocal": 3970 self.append_tag( 3971 release_id, tm, '~cea_vocalists', instrumented_name) 3972 self.append_tag( 3973 release_id, tm, '~cea_vocalist_names', name) 3974 self.append_tag( 3975 release_id, tm, '~cea_vocalists_sort', sort_name) 3976 elif instrument: 3977 self.append_tag( 3978 release_id, tm, '~cea_instrumentalists', instrumented_name) 3979 self.append_tag( 3980 release_id, tm, '~cea_instrumentalist_names', name) 3981 self.append_tag( 3982 release_id, tm, '~cea_instrumentalists_sort', sort_name) 3983 else: 3984 self.append_tag( 3985 release_id, tm, '~cea_other_soloists', instrumented_name) 3986 self.append_tag( 3987 release_id, tm, '~cea_other_soloist_names', name) 3988 self.append_tag( 3989 release_id, tm, '~cea_other_soloists_sort', sort_name) 3990 3991 # set album artists 3992 if performer_type or artist_type == 'conductor': 3993 cea_album_tag = cea_tag.replace( 3994 'cea', 'cea_album').replace( 3995 'performers', performer_type) 3996 cea_album_sort_tag = cea_sort_tag.replace( 3997 'cea', 'cea_album').replace( 3998 'performers', performer_type) 3999 if stripsir(name) in tm['~albumartists'] or stripsir( 4000 sort_name) in tm['~albumartists_sort']: 4001 self.append_tag(release_id, tm, cea_album_tag, name) 4002 self.append_tag( 4003 release_id, tm, cea_album_sort_tag, sort_name) 4004 else: 4005 if performer_type: 4006 self.append_tag( 4007 release_id, tm, '~cea_support_performers', instrumented_name) 4008 self.append_tag( 4009 release_id, tm, '~cea_support_performer_names', name) 4010 self.append_tag( 4011 release_id, tm, '~cea_support_performers_sort', sort_name) 4012 4013############## 4014############## 4015# WORK PARTS # 4016############## 4017############## 4018 4019 4020class PartLevels(): 4021 # QUEUE-HANDLING 4022 class WorksQueue(LockableObject): 4023 """Object for managing the queue of lookups""" 4024 4025 def __init__(self): 4026 LockableObject.__init__(self) 4027 self.queue = {} 4028 4029 def __contains__(self, name): 4030 return name in self.queue 4031 4032 def __iter__(self): 4033 return self.queue.__iter__() 4034 4035 def __getitem__(self, name): 4036 self.lock_for_read() 4037 value = self.queue[name] if name in self.queue else None 4038 self.unlock() 4039 return value 4040 4041 def __setitem__(self, name, value): 4042 self.lock_for_write() 4043 self.queue[name] = value 4044 self.unlock() 4045 4046 def append(self, name, value): 4047 self.lock_for_write() 4048 if name in self.queue: 4049 self.queue[name].append(value) 4050 value = False 4051 else: 4052 self.queue[name] = [value] 4053 value = True 4054 self.unlock() 4055 return value 4056 4057 def remove(self, name): 4058 self.lock_for_write() 4059 value = None 4060 if name in self.queue: 4061 value = self.queue[name] 4062 del self.queue[name] 4063 self.unlock() 4064 return value 4065 4066 # INITIALISATION 4067 4068 def __init__(self): 4069 self.works_cache = {} 4070 # maintains list of parent of each workid, or None if no parent found, 4071 # so that XML lookup need only executed if no existing record 4072 4073 self.partof = collections.defaultdict(dict) 4074 # the inverse of the above (immediate children of each parent) 4075 # but note that this is specific to the album as children may vary between albums 4076 # so format is {album1{parent1: child1, parent2:, child2}, 4077 # album2{....}} 4078 4079 self.works_queue = self.WorksQueue() 4080 # lookup queue - holds track/album pairs for each queued workid (may be 4081 # more than one pair per id, especially for higher-level parts) 4082 4083 self.parts = collections.defaultdict( 4084 lambda: collections.defaultdict(dict)) 4085 # metadata collection for all parts - structure is {workid: {name: , 4086 # parent: , (track,album): {part_levels}}, etc} 4087 4088 self.top_works = collections.defaultdict(dict) 4089 # metadata collection for top-level works for (track, album) - 4090 # structure is {(track, album): {workId: }, etc} 4091 4092 self.trackback = collections.defaultdict( 4093 lambda: collections.defaultdict(dict)) 4094 # hierarchical iterative work structure - {album: {id: , children:{id: 4095 # , children{}, id: etc}, id: etc} } 4096 4097 self.child_listing = collections.defaultdict(list) 4098 # contains list of workIds which are descendants of a given workId, to 4099 # prevent recursion when adding new ids 4100 4101 self.work_listing = collections.defaultdict(list) 4102 # contains list of workIds for each album 4103 4104 self.top = collections.defaultdict(list) 4105 # self.top[album] = list of work Ids which are top-level works in album 4106 4107 self.options = collections.defaultdict(dict) 4108 # active Classical Extras options for current track 4109 4110 self.synonyms = collections.defaultdict(dict) 4111 # active synonym options for current track 4112 4113 self.replacements = collections.defaultdict(dict) 4114 # active synonym options for current track 4115 4116 self.file_works = collections.defaultdict(list) 4117 # list of works derived from SongKong-style file tags 4118 # structure is {(album, track): [{workid: , name: }, {workid: ....}} 4119 4120 self.album_artists = collections.defaultdict( 4121 lambda: collections.defaultdict(dict)) 4122 # collection of artists to be applied at album level 4123 4124 self.artist_aliases = {} 4125 # collection of alias names - format is {sort_name: alias_name, ...} 4126 4127 self.artist_credits = collections.defaultdict(dict) 4128 # collection of credited-as names - format is {album: {sort_name: credit_name, 4129 # ...}, ...} 4130 4131 self.release_artists_sort = collections.defaultdict(list) 4132 # collection of release artists - format is {album: [sort_name_1, 4133 # sort_name_2, ...]} 4134 4135 self.lyricist_filled = collections.defaultdict(dict) 4136 # Boolean for each track to indicate if lyricist has been found (don't 4137 # want to add more from higher levels) 4138 4139 self.orphan_tracks = collections.defaultdict(list) 4140 # To keep a list for each album of tracks which do not have works - 4141 # format is {album: [track1, track2, ...], etc} 4142 4143 self.tracks = collections.defaultdict( 4144 lambda: collections.defaultdict(dict)) 4145 # To keep a list of all tracks for the album - format is {album: 4146 # {track1: {movement-group: movementgroup, movement-number: movementnumber}, 4147 # track2: {}, ..., etc}, album2: etc} 4148 4149 ######################################## 4150 # SECTION 1 - Initial track processing # 4151 ######################################## 4152 4153 def add_work_info( 4154 self, 4155 album, 4156 track_metadata, 4157 trackXmlNode, 4158 releaseXmlNode): 4159 """ 4160 Main Routine - run for each track 4161 :param album: 4162 :param track_metadata: 4163 :param trackXmlNode: 4164 :param releaseXmlNode: 4165 :return: 4166 """ 4167 release_id = track_metadata['musicbrainz_albumid'] 4168 if 'start' not in release_status[release_id]: 4169 release_status[release_id]['start'] = datetime.now() 4170 if 'lookups' not in release_status[release_id]: 4171 release_status[release_id]['lookups'] = 0 4172 release_status[release_id]['name'] = track_metadata['album'] 4173 release_status[release_id]['works'] = True 4174 if config.setting['log_debug'] or config.setting['log_info']: 4175 write_log( 4176 release_id, 4177 'debug', 4178 'STARTING WORKS PROCESSING FOR ALBUM %s, DISC %s, TRACK %s', 4179 track_metadata['album'], 4180 track_metadata['discnumber'], 4181 track_metadata['tracknumber'] + 4182 ' ' + 4183 track_metadata['title']) 4184 # clear the cache if required (if this is not done, then queue count may get out of sync) 4185 # Jump through hoops to get track object!! 4186 track = album._new_tracks[-1] 4187 tm = track.metadata 4188 if config.setting['log_debug'] or config.setting['log_info']: 4189 write_log( 4190 release_id, 4191 'debug', 4192 'Cache setting for track %s is %s', 4193 track, 4194 config.setting['use_cache']) 4195 4196 # OPTIONS - OVER-RIDE IF REQUIRED 4197 if '~ce_options' not in tm: 4198 if config.setting['log_debug'] or config.setting['log_info']: 4199 write_log(release_id, 'debug', 'Workparts gets track first...') 4200 get_options(release_id, album, track) 4201 options = interpret(tm['~ce_options']) 4202 4203 if not options: 4204 if config.setting['log_error']: 4205 write_log( 4206 release_id, 4207 'error', 4208 'Work Parts. Failure to read saved options for track %s. options = %s', 4209 track, 4210 tm['~ce_options']) 4211 options = option_settings(config.setting) 4212 self.options[track] = options 4213 4214 # CONSTANTS 4215 write_log(release_id, 'basic', 'Options: %s' ,options) 4216 self.ERROR = options["log_error"] 4217 self.WARNING = options["log_warning"] 4218 self.SEPARATORS = ['; '] 4219 self.EQ = "EQ_TO_BE_REVERSED" # phrase to indicate that a synonym has been used 4220 4221 self.get_sk_tags(release_id, album, track, tm, options) 4222 self.synonyms[track] = self.get_text_tuples( 4223 release_id, track, 'synonyms') # a list of tuples 4224 self.replacements[track] = self.get_text_tuples( 4225 release_id, track, 'replacements') # a list of tuples 4226 4227 # Continue? 4228 if not options["classical_work_parts"]: 4229 return 4230 4231 # OPTION-DEPENDENT CONSTANTS: 4232 # Maximum number of XML- lookup retries if error returned from server 4233 self.MAX_RETRIES = options["cwp_retries"] 4234 self.USE_CACHE = options["use_cache"] 4235 if options["cwp_partial"] and options["cwp_partial_text"] and options["cwp_level0_works"]: 4236 options["cwp_removewords_p"] = options["cwp_removewords"] + \ 4237 ", " + options["cwp_partial_text"] + ' ' 4238 else: 4239 options["cwp_removewords_p"] = options["cwp_removewords"] 4240 # Explanation: 4241 # If "Partial" is selected then the level 0 work name will have PARTIAL_TEXT appended to it. 4242 # If a recording is split across several tracks then each sub-part (quasi-movement) will have the same name 4243 # (with the PARTIAL_TEXT added). If level 0 is used to source work names then the level 1 work name will be 4244 # changed to be this repeated name and will therefore also include PARTIAL_TEXT. 4245 # So we need to add PARTIAL_TEXT to the prefixes list to ensure it is excluded from the level 1 work name. 4246 # 4247 write_log( 4248 release_id, 4249 'debug', 4250 "PartLevels - LOAD NEW TRACK: :%s", 4251 track) 4252 # write_log(release_id, 'info', "trackXmlNode:") # warning - may break Picard 4253 4254 # first time for this album (reloads each refresh) 4255 if tm['discnumber'] == '1' and tm['tracknumber'] == '1': 4256 # get artist aliases - these are cached so can be re-used across 4257 # releases, but are reloaded with each refresh 4258 get_aliases(self, release_id, album, options, releaseXmlNode) 4259 4260 # fix titles which include composer name 4261 composersort =[] 4262 if 'compposersort' in tm: 4263 composersort = str_to_list(['composersort']) 4264 composerlastnames = [] 4265 for composer in composersort: 4266 lname = re.compile(r'(.*),') 4267 match = lname.search(composer) 4268 if match: 4269 composerlastnames.append(match.group(1)) 4270 else: 4271 composerlastnames.append(composer) 4272 title = track_metadata['title'] 4273 colons = title.count(":") 4274 if colons > 0: 4275 title_split = title.split(': ', 1) 4276 test = title_split[0] 4277 if test in composerlastnames: 4278 track_metadata['~cwp_title'] = title_split[1] 4279 4280 # now process works 4281 write_log( 4282 release_id, 4283 'info', 4284 'PartLevels - add_work_info - metadata load = %r', 4285 track_metadata) 4286 workIds = [] 4287 if 'musicbrainz_workid' in tm: 4288 workIds = str_to_list(tm['musicbrainz_workid']) 4289 if workIds and not (options["ce_no_run"] and ( 4290 not tm['~ce_file'] or tm['~ce_file'] == "None")): 4291 self.build_work_info(release_id, options, trackXmlNode, album, track, track_metadata, workIds) 4292 4293 else: # no work relation 4294 write_log( 4295 release_id, 4296 'warning', 4297 "WARNING - no works for this track: \"%s\"", 4298 title) 4299 self.append_tag( 4300 release_id, 4301 track_metadata, 4302 '~cwp_warning', 4303 '3. No works for this track') 4304 if album in self.orphan_tracks: 4305 if track not in self.orphan_tracks[album]: 4306 self.orphan_tracks[album].append(track) 4307 else: 4308 self.orphan_tracks[album] = [track] 4309 # Don't publish metadata yet until all album is processed 4310 4311 # last track 4312 write_log( 4313 release_id, 4314 'debug', 4315 'Check for last track. Requests = %s, Tracknumber = %s, Totaltracks = %s,' 4316 ' Discnumber = %s, Totaldiscs = %s', 4317 album._requests, 4318 track_metadata['tracknumber'], 4319 track_metadata['totaltracks'], 4320 track_metadata['discnumber'], 4321 track_metadata['totaldiscs']) 4322 if album._requests == 0 and track_metadata['tracknumber'] == track_metadata[ 4323 'totaltracks'] and track_metadata['discnumber'] == track_metadata['totaldiscs']: 4324 self.process_album(release_id, album) 4325 release_status[release_id]['works-done'] = datetime.now() 4326 close_log(release_id, 'works') 4327 4328 4329 def build_work_info(self, release_id, options, trackXmlNode, album, track, track_metadata, workIds): 4330 """ 4331 Construct the work metadata, taking into account partial recordings and medleys 4332 :param release_id: 4333 :param options: 4334 :param trackXmlNode: JSON returned by the webservice 4335 :param album: 4336 :param track: 4337 :param track_metadata: 4338 :param workIds: work ids for this track 4339 :return: 4340 """ 4341 work_list_info = [] 4342 keyed_workIds = {} 4343 for i, workId in enumerate(workIds): 4344 4345 # sort by ordering_key, if any 4346 match_tree = [ 4347 'recording', 4348 'relations', 4349 'target-type:work', 4350 'work', 4351 'id:' + workId] 4352 rels = parse_data(release_id, trackXmlNode, [], *match_tree) 4353 # for recordings which are ordered within track:- 4354 match_tree_1 = [ 4355 'ordering-key'] 4356 # for recordings of works which are ordered as part of parent 4357 # (may be duplicated by top-down check later):- 4358 match_tree_2 = [ 4359 'relations', 4360 'target-type:work', 4361 'type:parts', 4362 'direction:backward', 4363 'ordering-key'] 4364 parse_result = parse_data(release_id, 4365 rels, 4366 [], 4367 *match_tree_1) + parse_data(release_id, 4368 rels, 4369 [], 4370 *match_tree_2) 4371 write_log( 4372 release_id, 4373 'info', 4374 'multi-works - ordering key: %s', 4375 parse_result) 4376 if parse_result: 4377 if isinstance(parse_result[0], int): 4378 key = parse_result[0] 4379 elif isinstance(parse_result[0], str) and parse_result[0].isdigit(): 4380 key = int(parse_result[0]) 4381 else: 4382 key = 100 + i 4383 else: 4384 key = 100 + i 4385 keyed_workIds[key] = workId 4386 partial = False 4387 for key in sorted(keyed_workIds): 4388 workId = keyed_workIds[key] 4389 work_rels = parse_data( 4390 release_id, 4391 trackXmlNode, 4392 [], 4393 'recording', 4394 'relations', 4395 'target-type:work', 4396 'work.id:' + workId) 4397 write_log(release_id, 'info', 'work_rels: %s', work_rels) 4398 work_attributes = parse_data( 4399 release_id, work_rels, [], 'attributes')[0] 4400 write_log( 4401 release_id, 4402 'info', 4403 'work_attributes: %s', 4404 work_attributes) 4405 work_titles = parse_data( 4406 release_id, work_rels, [], 'work', 'title') 4407 work_list_info_item = { 4408 'id': workId, 4409 'attributes': work_attributes, 4410 'titles': work_titles} 4411 work_list_info.append(work_list_info_item) 4412 work = [] 4413 for title in work_titles: 4414 work.append(title) 4415 if options['cwp_partial']: 4416 # treat the recording as work level 0 and the work of which it 4417 # is a partial recording as work level 1 4418 if 'partial' in work_attributes: 4419 partial = True 4420 parentId = workId 4421 workId = track_metadata['musicbrainz_recordingid'] 4422 4423 works = [] 4424 for w in work: 4425 partwork = w 4426 works.append(partwork) 4427 4428 write_log( 4429 release_id, 4430 'info', 4431 "Id %s is PARTIAL RECORDING OF id: %s, name: %s", 4432 workId, 4433 parentId, 4434 work) 4435 work_list_info_item = { 4436 'id': workId, 4437 'attributes': [], 4438 'titles': works, 4439 'parent': parentId} 4440 work_list_info.append(work_list_info_item) 4441 write_log( 4442 release_id, 4443 'info', 4444 'work_list_info: %s', 4445 work_list_info) 4446 # we now have a list of items, where the id of each is a work id for the track or 4447 # (multiple instances of) the recording id (for partial works) 4448 # we need to turn this into a usable hierarchy - i.e. just one item 4449 workId_list = [] 4450 work_list = [] 4451 parent_list = [] 4452 attribute_list = [] 4453 workId_list_p = [] 4454 work_list_p = [] 4455 attribute_list_p = [] 4456 for w in work_list_info: 4457 if 'partial' not in w['attributes'] or not options[ 4458 'cwp_partial']: # just do the bottom-level 'works' first 4459 workId_list.append(w['id']) 4460 work_list += w['titles'] 4461 attribute_list += w['attributes'] 4462 if 'parent' in w: 4463 if w['parent'] not in parent_list: # avoid duplicating parents! 4464 parent_list.append(w['parent']) 4465 else: 4466 workId_list_p.append(w['id']) 4467 work_list_p += w['titles'] 4468 attribute_list_p += w['attributes'] 4469 # de-duplicate work names 4470 # list(set()) won't work as need to retain order 4471 work_list = list(collections.OrderedDict.fromkeys(work_list)) 4472 work_list_p = list(collections.OrderedDict.fromkeys(work_list_p)) 4473 4474 workId_tuple = tuple(workId_list) 4475 workId_tuple_p = tuple(workId_list_p) 4476 if workId_tuple not in self.work_listing[album]: 4477 self.work_listing[album].append(workId_tuple) 4478 if workId_tuple not in self.parts or not self.USE_CACHE: 4479 self.parts[workId_tuple]['name'] = work_list 4480 if parent_list: 4481 if workId_tuple in self.works_cache: 4482 self.works_cache[workId_tuple] += parent_list 4483 self.parts[workId_tuple]['parent'] += parent_list 4484 else: 4485 self.works_cache[workId_tuple] = parent_list 4486 self.parts[workId_tuple]['parent'] = parent_list 4487 self.parts[workId_tuple_p]['name'] = work_list_p 4488 if workId_tuple_p not in self.work_listing[album]: 4489 self.work_listing[album].append(workId_tuple_p) 4490 4491 if 'medley' in attribute_list_p: 4492 self.parts[workId_tuple_p]['medley'] = True 4493 4494 if 'medley' in attribute_list: 4495 self.parts[workId_tuple]['medley'] = True 4496 4497 if partial: 4498 self.parts[workId_tuple]['partial'] = True 4499 4500 self.trackback[album][workId_tuple]['id'] = workId_list 4501 if 'meta' in self.trackback[album][workId_tuple]: 4502 if (track, 4503 album) not in self.trackback[album][workId_tuple]['meta']: 4504 self.trackback[album][workId_tuple]['meta'].append( 4505 (track, album)) 4506 else: 4507 self.trackback[album][workId_tuple]['meta'] = [(track, album)] 4508 write_log( 4509 release_id, 4510 'info', 4511 "Trackback for %s is %s. Partial = %s", 4512 track, 4513 self.trackback[album][workId_tuple], 4514 partial) 4515 4516 if workId_tuple in self.works_cache and ( 4517 self.USE_CACHE or partial): 4518 write_log( 4519 release_id, 4520 'debug', 4521 "GETTING WORK METADATA FROM CACHE, for work %s", 4522 workId_tuple) 4523 if workId_tuple not in self.work_listing[album]: 4524 self.work_listing[album].append(workId_tuple) 4525 not_in_cache = self.check_cache( 4526 track_metadata, album, track, workId_tuple, []) 4527 else: 4528 if partial: 4529 not_in_cache = [workId_tuple_p] 4530 else: 4531 not_in_cache = [workId_tuple] 4532 for workId_tuple in not_in_cache: 4533 if not self.USE_CACHE: 4534 if workId_tuple in self.works_cache: 4535 del self.works_cache[workId_tuple] 4536 self.work_not_in_cache(release_id, album, track, workId_tuple) 4537 4538 4539 def get_sk_tags(self, release_id, album, track, tm, options): 4540 """ 4541 Get file tags which are consistent with SongKong's metadata usage 4542 :param release_id: name for log file - usually =musicbrainz_albumid 4543 unless called outside metadata processor 4544 :param album: 4545 :param track: 4546 :param tm: 4547 :param options: 4548 :return: 4549 """ 4550 if options["cwp_use_sk"]: 4551 if '~ce_file' in tm and interpret(tm['~ce_file']): 4552 music_file = tm['~ce_file'] 4553 orig_metadata = album.tagger.files[music_file].orig_metadata 4554 if 'musicbrainz_work_composition_id' in orig_metadata and 'musicbrainz_workid' in orig_metadata: 4555 if 'musicbrainz_work_composition' in orig_metadata: 4556 if 'musicbrainz_work' in orig_metadata: 4557 if orig_metadata['musicbrainz_work_composition_id'] == orig_metadata[ 4558 'musicbrainz_workid'] \ 4559 and orig_metadata['musicbrainz_work_composition'] != orig_metadata[ 4560 'musicbrainz_work']: 4561 # Picard may have overwritten SongKong tag (top 4562 # work id) with bottom work id 4563 write_log( 4564 release_id, 4565 'warning', 4566 'File tag musicbrainz_workid incorrect? id = %s. Sourcing from MB', 4567 orig_metadata['musicbrainz_workid']) 4568 if self.WARNING: 4569 self.append_tag( 4570 release_id, 4571 tm, 4572 '~cwp_warning', 4573 '4. File tag musicbrainz_workid incorrect? id = ' + 4574 orig_metadata['musicbrainz_workid'] + 4575 '. Sourcing from MB') 4576 return None 4577 write_log( 4578 release_id, 4579 'info', 4580 'Read from file tag: musicbrainz_work_composition_id: %s', 4581 orig_metadata['musicbrainz_work_composition_id']) 4582 self.file_works[(album, track)].append({ 4583 'workid': orig_metadata['musicbrainz_work_composition_id'].split('; '), 4584 'name': orig_metadata['musicbrainz_work_composition']}) 4585 else: 4586 wid = orig_metadata['musicbrainz_work_composition_id'] 4587 write_log( 4588 release_id, 4589 'error', 4590 "No matching work name for id tag %s", 4591 wid) 4592 if self.ERROR: 4593 self.append_tag( 4594 release_id, 4595 tm, 4596 '~cwp_error', 4597 '2. No matching work name for id tag ' + 4598 wid) 4599 return None 4600 n = 1 4601 while 'musicbrainz_work_part_level' + \ 4602 str(n) + '_id' in orig_metadata: 4603 if 'musicbrainz_work_part_level' + \ 4604 str(n) in orig_metadata: 4605 self.file_works[(album, track)].append({ 4606 'workid': orig_metadata[ 4607 'musicbrainz_work_part_level' + str(n) + '_id'].split('; '), 4608 'name': orig_metadata['musicbrainz_work_part_level' + str(n)]}) 4609 n += 1 4610 else: 4611 wid = orig_metadata['musicbrainz_work_part_level' + 4612 str(n) + '_id'] 4613 write_log( 4614 release_id, 'error', "No matching work name for id tag %s", wid) 4615 if self.ERROR: 4616 self.append_tag( 4617 release_id, 4618 tm, 4619 '~cwp_error', 4620 '2. No matching work name for id tag ' + 4621 wid) 4622 break 4623 if orig_metadata['musicbrainz_work_composition_id'] != orig_metadata[ 4624 'musicbrainz_workid']: 4625 if 'musicbrainz_work' in orig_metadata: 4626 self.file_works[(album, track)].append({ 4627 'workid': orig_metadata['musicbrainz_workid'].split('; '), 4628 'name': orig_metadata['musicbrainz_work']}) 4629 else: 4630 wid = orig_metadata['musicbrainz_workid'] 4631 write_log( 4632 release_id, 'error', "No matching work name for id tag %s", wid) 4633 if self.ERROR: 4634 self.append_tag( 4635 release_id, 4636 tm, 4637 '~cwp_error', 4638 '2. No matching work name for id tag ' + 4639 wid) 4640 return None 4641 file_work_levels = len(self.file_works[(album, track)]) 4642 write_log(release_id, 4643 'debug', 4644 'Loaded works from file tags for track %s. Works: %s: ', 4645 track, 4646 self.file_works[(album, 4647 track)]) 4648 for i, work in enumerate(self.file_works[(album, track)]): 4649 workId = tuple(work['workid']) 4650 if workId not in self.works_cache: # Use cache in preference to file tags 4651 if workId not in self.work_listing[album]: 4652 self.work_listing[album].append(workId) 4653 self.parts[workId]['name'] = [work['name']] 4654 parentId = None 4655 parent = '' 4656 if i < file_work_levels - 1: 4657 parentId = self.file_works[( 4658 album, track)][i + 1]['workid'] 4659 parent = self.file_works[( 4660 album, track)][i + 1]['name'] 4661 4662 if parentId: 4663 self.works_cache[workId] = parentId 4664 self.parts[workId]['parent'] = parentId 4665 self.parts[tuple(parentId)]['name'] = [parent] 4666 else: 4667 # so we remember we looked it up and found none 4668 self.parts[workId]['no_parent'] = True 4669 self.top_works[(track, album) 4670 ]['workId'] = workId 4671 if workId not in self.top[album]: 4672 self.top[album].append(workId) 4673 4674 def check_cache(self, tm, album, track, workId_tuple, not_in_cache): 4675 """ 4676 Recursive loop to get cached works 4677 :param tm: 4678 :param album: 4679 :param track: 4680 :param workId_tuple: 4681 :param not_in_cache: 4682 :return: 4683 """ 4684 parentId_tuple = tuple(self.works_cache[workId_tuple]) 4685 if parentId_tuple not in self.work_listing[album]: 4686 self.work_listing[album].append(parentId_tuple) 4687 4688 if parentId_tuple in self.works_cache: 4689 self.check_cache(tm, album, track, parentId_tuple, not_in_cache) 4690 else: 4691 not_in_cache.append(parentId_tuple) 4692 return not_in_cache 4693 4694 def work_not_in_cache(self, release_id, album, track, workId_tuple): 4695 """ 4696 Determine actions if work not in cache (is it the top or do we need to look up?) 4697 :param release_id: name for log file - usually =musicbrainz_albumid 4698 unless called outside metadata processor 4699 :param album: 4700 :param track: 4701 :param workId_tuple: 4702 :return: 4703 """ 4704 4705 write_log( 4706 release_id, 4707 'debug', 4708 'Processing work_not_in_cache for workId %s', 4709 workId_tuple) 4710 ## NB the first condition below is to prevent the side effect of assigning a dictionary entry in self.parts for workId with no details 4711 if workId_tuple in self.parts and 'no_parent' in self.parts[workId_tuple] and ( 4712 self.USE_CACHE or self.options[track]["cwp_use_sk"]) and self.parts[workId_tuple]['no_parent']: 4713 write_log(release_id, 'info', '%s is top work', workId_tuple) 4714 self.top_works[(track, album)]['workId'] = workId_tuple 4715 if album in self.top: 4716 if workId_tuple not in self.top[album]: 4717 self.top[album].append(workId_tuple) 4718 else: 4719 self.top[album] = [workId_tuple] 4720 else: 4721 write_log( 4722 release_id, 4723 'info', 4724 'Calling work_add_track to look up parents for %s', 4725 workId_tuple) 4726 for workId in workId_tuple: 4727 self.work_add_track(album, track, workId, 0) 4728 4729 write_log( 4730 release_id, 4731 'debug', 4732 'End of work_not_in_cache for workId %s', 4733 workId_tuple) 4734 4735 def work_add_track(self, album, track, workId, tries, user_data=True): 4736 """ 4737 Add the work to the lookup queue 4738 :param user_data: 4739 :param album: 4740 :param track: 4741 :param workId: 4742 :param tries: number of lookup attempts 4743 :return: 4744 """ 4745 release_id = track.metadata['musicbrainz_albumid'] 4746 write_log( 4747 release_id, 4748 'debug', 4749 "ADDING WORK TO LOOKUP QUEUE for work %s", 4750 workId) 4751 self.album_add_request(release_id, album) 4752 # to change the _requests variable to indicate that there are pending 4753 # requests for this item and delay Picard from finalizing the album 4754 write_log( 4755 release_id, 4756 'debug', 4757 "Added lookup request for id %s. Requests = %s", 4758 workId, 4759 album._requests) 4760 if self.works_queue.append( 4761 workId, 4762 (track, 4763 album)): # All work combos are queued, but only new workIds are passed to XML lookup 4764 host = config.setting["server_host"] 4765 port = config.setting["server_port"] 4766 path = "/ws/2/%s/%s" % ('work', workId) 4767 if config.setting['cwp_aliases'] and config.setting['cwp_aliases_tag_text']: 4768 if config.setting['cwp_aliases_tags_user'] and user_data: 4769 login = True 4770 tag_type = '+tags +user-tags' 4771 else: 4772 login = False 4773 tag_type = '+tags' 4774 else: 4775 login = False 4776 tag_type = '' 4777 queryargs = { 4778 "inc": "work-rels+artist-rels+label-rels+place-rels+aliases" + 4779 tag_type} 4780 write_log( 4781 release_id, 4782 'debug', 4783 "Initiating XML lookup for %s......", 4784 workId) 4785 if release_id in release_status and 'lookups' in release_status[release_id]: 4786 release_status[release_id]['lookups'] += 1 4787 return album.tagger.webservice.get( 4788 host, 4789 port, 4790 path, 4791 partial( 4792 self.work_process, 4793 workId, 4794 tries), 4795 # parse_response_type="xml", 4796 priority=True, 4797 important=False, 4798 mblogin=login, 4799 queryargs=queryargs) 4800 else: 4801 write_log( 4802 release_id, 4803 'debug', 4804 "Work is already in queue: %s", 4805 workId) 4806 4807 ########################################################################## 4808 # SECTION 2 - Works processing # 4809 # NB These functions may operate asynchronously over multiple albums (as well as multiple tracks) # 4810 ########################################################################## 4811 4812 def work_process(self, workId, tries, response, reply, error): 4813 """ 4814 Top routine to process the XML/JSON node response from the lookup 4815 NB This function may operate over multiple albums (as well as multiple tracks) 4816 :param workId: 4817 :param tries: 4818 :param response: 4819 :param reply: 4820 :param error: 4821 :return: 4822 """ 4823 4824 if error: 4825 tuples = self.works_queue.remove(workId) 4826 for track, album in tuples: 4827 release_id = track.metadata['musicbrainz_albumid'] 4828 write_log( 4829 release_id, 4830 'warning', 4831 "%r: Network error retrieving work record. Error code %r", 4832 workId, 4833 error) 4834 write_log( 4835 release_id, 4836 'debug', 4837 "Removed request after network error. Requests = %s", 4838 album._requests) 4839 if tries < self.MAX_RETRIES: 4840 user_data = True 4841 write_log(release_id, 'debug', "REQUEUEING...") 4842 if str(error) == '204': # Authentication error 4843 write_log( 4844 release_id, 'debug', "... without user authentication") 4845 user_data = False 4846 self.append_tag( 4847 release_id, 4848 track.metadata, 4849 '~cwp_error', 4850 '3. Authentication failure - data retrieval omits user-specific requests') 4851 self.work_add_track( 4852 album, track, workId, tries + 1, user_data) 4853 else: 4854 write_log( 4855 release_id, 4856 'error', 4857 "EXHAUSTED MAX RE-TRIES for XML lookup for track %s", 4858 track) 4859 if self.ERROR: 4860 self.append_tag( 4861 release_id, 4862 track.metadata, 4863 '~cwp_error', 4864 "4. ERROR: MISSING METADATA due to network errors. Re-try or fix manually.") 4865 self.album_remove_request(release_id, album) 4866 return 4867 4868 tuples = self.works_queue.remove(workId) 4869 if tuples: 4870 new_queue = [] 4871 prev_album = None 4872 album = tuples[0][1] # just added to prevent technical "reference before assignment" error 4873 release_id = 'No_release_id' 4874 for (track, album) in tuples: 4875 release_id = track.metadata['musicbrainz_albumid'] 4876 # Note that this need to be set here as the work may cover 4877 # multiple albums 4878 if album != prev_album: 4879 write_log(release_id, 'debug', 4880 "Work_process. FOUND WORK: %s for album %s", 4881 workId, album) 4882 write_log( 4883 release_id, 4884 'debug', 4885 "Requests for album %s = %s", 4886 album, 4887 album._requests) 4888 prev_album = album 4889 write_log(release_id, 'info', "RESPONSE = %s", response) 4890 # find the id_tuple(s) key with workId in it 4891 wid_list = [] 4892 for w in self.work_listing[album]: 4893 if workId in w and w not in wid_list: 4894 wid_list.append(w) 4895 write_log( 4896 release_id, 4897 'info', 4898 'wid_list for %s is %s', 4899 workId, 4900 wid_list) 4901 for wid in wid_list: # wid is a tuple 4902 write_log( 4903 release_id, 4904 'info', 4905 'processing workId tuple: %r', 4906 wid) 4907 metaList = self.work_process_metadata( 4908 release_id, workId, wid, track, response) 4909 parentList = metaList[0] 4910 # returns [[parent id], [parent name], attribute_list] or None if no parent 4911 # found 4912 arrangers = metaList[1] 4913 # not just arrangers - also composers, lyricists etc. 4914 if wid in self.parts: 4915 4916 if arrangers: 4917 if 'arrangers' in self.parts[wid]: 4918 self.parts[wid]['arrangers'] += arrangers 4919 else: 4920 self.parts[wid]['arrangers'] = arrangers 4921 4922 if parentList: 4923 # first fix the sort order of multi-works at the prev level 4924 # so that recordings of multiple movements of the same parent work will have the 4925 # movements listed in the correct order (i.e. 4926 # ordering-key, if available) 4927 if len(wid) > 1: 4928 for idx in wid: 4929 if idx == workId: 4930 match_tree = [ 4931 'relations', 4932 'target-type:work', 4933 'direction:backward', 4934 'ordering-key'] 4935 parse_result = parse_data( 4936 release_id, response, [], *match_tree) 4937 write_log( 4938 release_id, 4939 'info', 4940 'multi-works - ordering key for id %s is %s', 4941 idx, 4942 parse_result) 4943 if parse_result: 4944 if isinstance( 4945 parse_result[0], str) and parse_result[0].isdigit(): 4946 key = int(parse_result[0]) 4947 elif isinstance(parse_result[0], int): 4948 key = parse_result[0] 4949 else: 4950 key = 9999 4951 self.parts[wid]['order'][idx] = key 4952 4953 parentIds = parentList[0] 4954 parents = parentList[1] 4955 parent_attributes = parentList[2] 4956 write_log( 4957 release_id, 4958 'info', 4959 'Parents - ids: %s, names: %s', 4960 parentIds, 4961 parents) 4962 # remove any parents that are descendants of wid as 4963 # they will result in circular references 4964 del_list = [] 4965 for i, parentId in enumerate(parentIds): 4966 for work_item in wid: 4967 if work_item in self.child_listing and parentId in self.child_listing[ 4968 work_item]: 4969 del_list.append(i) 4970 for i in list(set(del_list)): 4971 removed_id = parentIds.pop(i) 4972 removed_name = parents.pop(i) 4973 write_log( 4974 release_id, 'error', "Found parent which is descendant of child - " 4975 "not using, to prevent circular references. id = %s," 4976 " name = %s", removed_id, removed_name) 4977 tm = track.metadata 4978 self.append_tag( 4979 release_id, 4980 tm, 4981 '~cwp_error', 4982 '5. Found parent which which is descendant of child - not using ' 4983 'to prevent circular references. id = ' + 4984 removed_id + 4985 ', name = ' + 4986 removed_name) 4987 is_collection = False 4988 for attribute in parent_attributes: 4989 if attribute['collection']: 4990 is_collection = True 4991 break 4992 # de-dup parent ids before we start 4993 parentIds = list( 4994 collections.OrderedDict.fromkeys(parentIds)) 4995 4996 # add descendants to checklist to prevent recursion 4997 for p in parentIds: 4998 for w in wid: 4999 self.child_listing[p].append(w) 5000 if w in self.child_listing: 5001 self.child_listing[p] += self.child_listing[w] 5002 5003 if parentIds: 5004 if wid in self.works_cache: 5005 # Make sure we haven't done this 5006 # relationship before, perhaps for another 5007 # album 5008 5009 if not (set( 5010 self.works_cache[wid]) >= set(parentIds)): 5011 prev_ids = tuple(self.works_cache[wid]) 5012 prev_name = self.parts[prev_ids]['name'] 5013 self.works_cache[wid] = add_list_uniquely( 5014 self.works_cache[wid], parentIds) 5015 self.parts[wid]['parent'] = add_list_uniquely( 5016 self.parts[wid]['parent'], parentIds) 5017 index = self.work_listing[album].index( 5018 prev_ids) 5019 new_id_list = add_list_uniquely( 5020 list(prev_ids), parentIds) 5021 new_ids = tuple(new_id_list) 5022 self.work_listing[album][index] = new_ids 5023 self.parts[new_ids] = self.parts[prev_ids] 5024 #del self.parts[prev_ids] # Removed from here to deal with multi-parent parts. De-dup now takes place in process_albums. 5025 self.parts[new_ids]['name'] = add_list_uniquely( 5026 prev_name, parents) 5027 parentIds = new_id_list 5028 write_log( 5029 release_id, 5030 'debug', 5031 "In work_process. Changed wid in self.part: prev_ids = %s, new_ids = %s, prev_name = %s, new name = %s", 5032 prev_ids, 5033 new_ids, 5034 prev_name, 5035 self.parts[new_ids]['name']) 5036 5037 5038 else: 5039 self.works_cache[wid] = parentIds 5040 self.parts[wid]['parent'] = parentIds 5041 self.parts[tuple(parentIds) 5042 ]['name'] = parents 5043 self.work_listing[album].append( 5044 tuple(parentIds)) 5045 # de-duplicate the parent names 5046 # self.parts[tuple(parentIds)]['name'] = list( 5047 # collections.OrderedDict.fromkeys(self.parts[tuple(parentIds)]['name'])) 5048 # list(set()) won't work as need to retain order 5049 self.parts[tuple(parentIds)]['is_collection'] = is_collection 5050 write_log( 5051 release_id, 5052 'debug', 5053 "In work_process. self.parts[%s]['is_collection']: %s", 5054 tuple(parentIds), 5055 self.parts[tuple(parentIds)]['is_collection']) 5056 # de-duplicate the parent ids also, otherwise they will be treated as a separate parent 5057 # in the trackback structure 5058 self.parts[wid]['parent'] = list( 5059 collections.OrderedDict.fromkeys( 5060 self.parts[wid]['parent'])) 5061 self.works_cache[wid] = list( 5062 collections.OrderedDict.fromkeys( 5063 self.works_cache[wid])) 5064 write_log( 5065 release_id, 5066 'info', 5067 'Added parent ids to work_listing: %s, [Requests = %s]', 5068 parentIds, 5069 album._requests) 5070 write_log( 5071 release_id, 5072 'info', 5073 'work_listing after adding parents: %s', 5074 self.work_listing[album]) 5075 # the higher-level work might already be in 5076 # cache from another album 5077 if tuple( 5078 parentIds) in self.works_cache and self.USE_CACHE: 5079 not_in_cache = self.check_cache( 5080 track.metadata, album, track, tuple(parentIds), []) 5081 for workId_tuple in not_in_cache: 5082 new_queue.append( 5083 (release_id, album, track, workId_tuple)) 5084 5085 else: 5086 if not self.USE_CACHE: 5087 if tuple( 5088 parentIds) in self.works_cache: 5089 del self.works_cache[tuple( 5090 parentIds)] 5091 for parentId in parentIds: 5092 new_queue.append( 5093 (release_id, album, track, (parentId,))) 5094 5095 else: 5096 # so we remember we looked it up and found none 5097 self.parts[wid]['no_parent'] = True 5098 self.top_works[(track, album)]['workId'] = wid 5099 if wid not in self.top[album]: 5100 self.top[album].append(wid) 5101 write_log( 5102 release_id, 'info', "TOP[album]: %s", self.top[album]) 5103 else: 5104 # so we remember we looked it up and found none 5105 self.parts[wid]['no_parent'] = True 5106 self.top_works[(track, album)]['workId'] = wid 5107 self.top[album].append(wid) 5108 5109 write_log( 5110 release_id, 5111 'debug', 5112 "End of tuple processing for workid %s in album %s, track %s," 5113 " requests remaining = %s, new queue is %r", 5114 workId, 5115 album, 5116 track, 5117 album._requests, 5118 new_queue) 5119 self.album_remove_request(release_id, album) 5120 for queued_item in new_queue: 5121 write_log( 5122 release_id, 5123 'info', 5124 'Have a new queue: queued_item = %r', 5125 queued_item) 5126 write_log( 5127 release_id, 5128 'debug', 5129 'Penultimate end of work_process for %s (subject to parent lookups in "new_queue")', 5130 workId) 5131 for queued_item in new_queue: 5132 self.work_not_in_cache( 5133 queued_item[0], 5134 queued_item[1], 5135 queued_item[2], 5136 queued_item[3]) 5137 write_log(release_id, 'debug', 5138 'Ultimate end of work_process for %s', workId) 5139 5140 if album._requests == 0: 5141 self.process_album(release_id, album) 5142 album._finalize_loading(None) 5143 release_status[release_id]['works-done'] = datetime.now() 5144 close_log(release_id, 'works') 5145 5146 def work_process_metadata(self, release_id, workId, wid, track, response): 5147 """ 5148 Process XML node 5149 :param release_id: name for log file - usually =musicbrainz_albumid 5150 unless called outside metadata processor 5151 NB release_id may be from a different album than the original, if works lookups are identical 5152 :param workId: 5153 :param wid: The work id tuple of which workId is a member 5154 :param track: 5155 :param response: 5156 :return: 5157 """ 5158 write_log(release_id, 'debug', "In work_process_metadata") 5159 all_tags = parse_data(release_id, response, [], 'tags', 'name') 5160 self.parts[wid]['folks_genres'] = all_tags 5161 self.parts[wid]['worktype_genres'] = parse_data( 5162 release_id, response, [], 'type') 5163 key = parse_data( 5164 release_id, 5165 response, 5166 [], 5167 'attributes', 5168 'type:Key', 5169 'value') 5170 self.parts[wid]['key'] = key 5171 composed_begin_dates = year( 5172 parse_data( 5173 release_id, 5174 response, 5175 [], 5176 'relations', 5177 'target-type:artist', 5178 'type:composer', 5179 'begin')) 5180 composed_end_dates = year( 5181 parse_data( 5182 release_id, 5183 response, 5184 [], 5185 'relations', 5186 'target-type:artist', 5187 'type:composer', 5188 'end')) 5189 if composed_begin_dates == composed_end_dates: 5190 composed_dates = composed_begin_dates 5191 else: 5192 composed_dates = list( 5193 zip(composed_begin_dates, composed_end_dates)) 5194 composed_dates = [y + DATE_SEP + z if y != z else y for y, z in composed_dates] 5195 self.parts[wid]['composed_dates'] = composed_dates 5196 published_begin_dates = year( 5197 parse_data( 5198 release_id, 5199 response, 5200 [], 5201 'relations', 5202 'target-type:label', 5203 'type:publishing', 5204 'begin')) 5205 published_end_dates = year( 5206 parse_data( 5207 release_id, 5208 response, 5209 [], 5210 'relations', 5211 'target-type:label', 5212 'type:publishing', 5213 'end')) 5214 if published_begin_dates == published_end_dates: 5215 published_dates = published_begin_dates 5216 else: 5217 published_dates = list( 5218 zip(published_begin_dates, published_end_dates)) 5219 published_dates = [x + DATE_SEP + y for x, y in published_dates] 5220 self.parts[wid]['published_dates'] = published_dates 5221 5222 premiered_begin_dates = year( 5223 parse_data( 5224 release_id, 5225 response, 5226 [], 5227 'relations', 5228 'target-type:place', 5229 'type:premiere', 5230 'begin')) 5231 premiered_end_dates = year( 5232 parse_data( 5233 release_id, 5234 response, 5235 [], 5236 'relations', 5237 'target-type:place', 5238 'type:premiere', 5239 'end')) 5240 if premiered_begin_dates == premiered_end_dates: 5241 premiered_dates = premiered_begin_dates 5242 else: 5243 premiered_dates = list( 5244 zip(premiered_begin_dates, premiered_end_dates)) 5245 premiered_dates = [x + DATE_SEP + y for x, y in premiered_dates] 5246 self.parts[wid]['premiered_dates'] = premiered_dates 5247 5248 if 'artist_locale' in config.setting: 5249 locale = config.setting["artist_locale"] 5250 # NB this is the Picard code in /util 5251 lang = locale.split("_")[0] 5252 alias = parse_data(release_id, response, [], 'aliases', 5253 'locale:' + lang, 'primary:True', 'name') 5254 user_tags = parse_data( 5255 release_id, response, [], 'user-tags', 'name') 5256 if config.setting['cwp_aliases_tags_user']: 5257 tags = user_tags 5258 else: 5259 tags = all_tags 5260 if alias: 5261 self.parts[wid]['alias'] = self.parts[wid]['name'][:] 5262 self.parts[wid]['tags'] = tags 5263 for ind, w in enumerate(wid): 5264 if w == workId: 5265 # alias should be a one item list but just in case it isn't... 5266 if len(self.parts[wid]['alias']) > ind: 5267 # The condition here is just to trap errors caused by database inconsistencies 5268 # (e.g. a part is shown as a recording of two works, one of which is an arrangement 5269 # of the other - this can create a two-item wid with a one-item self.parts[wid]['name'] 5270 self.parts[wid]['alias'][ind] = '; '.join( 5271 alias) 5272 relation_list = parse_data(release_id, response, [], 'relations') 5273 return self.work_process_relations( 5274 release_id, track, workId, wid, relation_list) 5275 5276 def work_process_relations( 5277 self, 5278 release_id, 5279 track, 5280 workId, 5281 wid, 5282 relations): 5283 """ 5284 Find the parents etc. 5285 NB track is just the last album/track for this work - used as being 5286 representative for options identification. If this is inconsistent (e.g. different collections 5287 option for albums with the same works) then the latest added track will over-ride others' settings). 5288 :param release_id: name for log file - usually =musicbrainz_albumid 5289 unless called outside metadata processor 5290 :param track: 5291 :param workId: 5292 :param wid: 5293 :param relations: 5294 :return: 5295 """ 5296 write_log( 5297 release_id, 5298 'debug', 5299 "In work_process_relations. Relations--> %s", 5300 relations) 5301 if track: 5302 options = self.options[track] 5303 else: 5304 options = config.setting 5305 new_workIds = [] 5306 new_works = [] 5307 attributes_list = [] 5308 relation_attributes = parse_data( 5309 release_id, 5310 relations, 5311 [], 5312 'target-type:work', 5313 'type:parts', 5314 'direction:backward', 5315 'attributes') 5316 new_work_list = [] 5317 write_log( 5318 release_id, 5319 'debug', 5320 "relation_attributes--> %s", 5321 relation_attributes) 5322 for relation_attribute in relation_attributes: 5323 if ( 5324 'part of collection' not in relation_attribute) or options['cwp_collections']: 5325 new_work_list += parse_data(release_id, 5326 relations, 5327 [], 5328 'target-type:work', 5329 'type:parts', 5330 'direction:backward', 5331 'work') 5332 attributes_dict = {'collection' : ('part of collection' in relation_attribute), 5333 'movements' : ('movement' in relation_attribute), 5334 'acts' : ('act' in relation_attribute), 5335 'numbers' : ('number' in relation_attribute)} 5336 attributes_list += [attributes_dict] 5337 if ( 5338 'part of collection' in relation_attribute) and not options['cwp_collections']: 5339 write_log( 5340 release_id, 5341 'info', 5342 'Not getting parent work because relationship is "part of collection" and option not selected') 5343 if new_work_list: 5344 write_log( 5345 release_id, 5346 'info', 5347 'new_work_list: %s', 5348 new_work_list) 5349 new_workIds = parse_data(release_id, new_work_list, [], 'id') 5350 new_works = parse_data(release_id, new_work_list, [], 'title') 5351 else: 5352 arrangement_of = parse_data( 5353 release_id, 5354 relations, 5355 [], 5356 'target-type:work', 5357 'type:arrangement', 5358 'direction:backward', 5359 'work') 5360 if arrangement_of and options['cwp_arrangements']: 5361 new_workIds = parse_data(release_id, arrangement_of, [], 'id') 5362 new_works = parse_data(release_id, arrangement_of, [], 'title') 5363 self.parts[wid]['arrangement'] = True 5364 else: 5365 medley_of = parse_data( 5366 release_id, 5367 relations, 5368 [], 5369 'target-type:work', 5370 'type:medley', 5371 'work') 5372 direction = parse_data( 5373 release_id, 5374 relations, 5375 [], 5376 'target-type:work', 5377 'type:medley', 5378 'direction') 5379 if 'backward' not in direction: 5380 write_log( 5381 release_id, 'info', 'Medley_of: %s', medley_of) 5382 if medley_of and options['cwp_medley']: 5383 medley_list = [] 5384 medley_id_list = [] 5385 for medley_item in medley_of: 5386 medley_list = medley_list + \ 5387 parse_data(release_id, medley_item, [], 'title') 5388 medley_id_list = medley_id_list + \ 5389 parse_data(release_id, medley_item, [], 'id') 5390 # (parse_data is a list...) 5391 new_workIds = medley_id_list 5392 new_works = medley_list 5393 write_log( 5394 release_id, 'info', 'Medley_list: %s', medley_list) 5395 self.parts[wid]['medley_list'] = medley_list 5396 5397 write_log( 5398 release_id, 5399 'info', 5400 'New works: ids: %s, names: %s, attributes: %s', 5401 new_workIds, 5402 new_works, 5403 attributes_list) 5404 5405 artists = get_artists( 5406 options, 5407 release_id, 5408 {}, 5409 relations, 5410 'work')['artists'] 5411 # artist_types = ['arranger', 'instrument arranger', 'orchestrator', 'composer', 'writer', 'lyricist', 5412 # 'librettist', 'revised by', 'translator', 'reconstructed by', 'vocal arranger'] 5413 5414 write_log(release_id, 'info', "ARTISTS %s", artists) 5415 5416 workItems = (new_workIds, new_works, attributes_list) 5417 itemsFound = [workItems, artists] 5418 return itemsFound 5419 5420 @staticmethod 5421 def album_add_request(release_id, album): 5422 """ 5423 To keep track as to whether all lookups have been processed 5424 :param release_id: name for log file - usually =musicbrainz_albumid 5425 unless called outside metadata processor 5426 :param album: 5427 :return: 5428 """ 5429 album._requests += 1 5430 write_log( 5431 release_id, 5432 'debug', 5433 "Added album request - requests: %s", 5434 album._requests) 5435 5436 @staticmethod 5437 def album_remove_request(release_id, album): 5438 """ 5439 To keep track as to whether all lookups have been processed 5440 :param release_id: name for log file - usually =musicbrainz_albumid 5441 unless called outside metadata processor 5442 :param album: 5443 :return: 5444 """ 5445 album._requests -= 1 5446 write_log( 5447 release_id, 5448 'debug', 5449 "Removed album request - requests: %s", 5450 album._requests) 5451 5452 ################################################## 5453 # SECTION 3 - Organise tracks and works in album # 5454 ################################################## 5455 5456 def process_album(self, release_id, album): 5457 """ 5458 Top routine to run end-of-album processes 5459 :param release_id: name for log file - usually =musicbrainz_albumid 5460 unless called outside metadata processor 5461 :param album: 5462 :return: 5463 """ 5464 write_log(release_id, 'debug', "PROCESS ALBUM %s", album) 5465 release_status[release_id]['done-lookups'] = datetime.now() 5466 # De-duplicate names in self.parts, maintaining order (in case part names have been arrived at via multiple paths) 5467 for part_item in self.parts: 5468 if 'name' in self.parts[part_item]: 5469 self.parts[part_item]['name'] = list(collections.OrderedDict.fromkeys(str_to_list(self.parts[part_item]['name']))) 5470 # populate the inverse hierarchy 5471 write_log(release_id, 'info', "Cache: %s", self.works_cache) 5472 write_log(release_id, 'info', "Work listing %s", self.work_listing) 5473 alias_tag_list = config.setting['cwp_aliases_tag_text'].split(',') 5474 for i, tag_item in enumerate(alias_tag_list): 5475 alias_tag_list[i] = tag_item.strip() 5476 for workId in self.work_listing[album]: 5477 if workId in self.parts: 5478 write_log( 5479 release_id, 5480 'info', 5481 'Processing workid: %s', 5482 workId) 5483 write_log( 5484 release_id, 5485 'info', 5486 'self.work_listing[album]: %s', 5487 self.work_listing[album]) 5488 if len(workId) > 1: 5489 # fix the order of names using ordering keys gathered in 5490 # work_process 5491 if 'order' in self.parts[workId]: 5492 seq = [] 5493 for idx in workId: 5494 if idx in self.parts[workId]['order']: 5495 seq.append(self.parts[workId]['order'][idx]) 5496 else: 5497 # for the possibility of workids not part of 5498 # the same parent and not all ordered 5499 seq.append(999) 5500 zipped_names = zip(self.parts[workId]['name'], seq) 5501 sorted_tups = sorted(zipped_names, key=lambda x: x[1]) 5502 self.parts[workId]['name'] = [x[0] 5503 for x in sorted_tups] 5504 # use aliases where appropriate 5505 # name is a list - need a string to test for Latin chars 5506 name_string = '; '.join(self.parts[workId]['name']) 5507 if config.setting['cwp_aliases']: 5508 if config.setting['cwp_aliases_all'] or ( 5509 config.setting['cwp_aliases_greek'] and not only_roman_chars(name_string)) or ( 5510 'tags' in self.parts[workId] and any( 5511 x in self.parts[workId]['tags'] for x in alias_tag_list)): 5512 if 'alias' in self.parts[workId] and self.parts[workId]['alias']: 5513 self.parts[workId]['name'] = self.parts[workId]['alias'][:] 5514 topId = None 5515 write_log( 5516 release_id, 5517 'info', 5518 'Works_cache: %s', 5519 self.works_cache) 5520 if workId in self.works_cache: 5521 parentIds = tuple(self.works_cache[workId]) 5522 # for parentId in parentIds: 5523 write_log( 5524 release_id, 5525 'debug', 5526 "Create inverses: %s, %s", 5527 workId, 5528 parentIds) 5529 if parentIds in self.partof[album]: 5530 if workId not in self.partof[album][parentIds]: 5531 self.partof[album][parentIds].append(workId) 5532 else: 5533 self.partof[album][parentIds] = [workId] 5534 write_log(release_id, 'info', "Partof: %s", 5535 self.partof[album][parentIds]) 5536 if 'no_parent' in self.parts[parentIds]: 5537 # to handle case if album includes works already in 5538 # cache from a different album 5539 if self.parts[parentIds]['no_parent']: 5540 topId = parentIds 5541 else: 5542 topId = workId 5543 if topId: 5544 if album in self.top: 5545 if topId not in self.top[album]: 5546 self.top[album].append(topId) 5547 else: 5548 self.top[album] = [topId] 5549 # work out the full hierarchy and part levels 5550 height = 0 5551 write_log( 5552 release_id, 5553 'info', 5554 "TOP: %s, \nALBUM: %s, \nTOP[ALBUM]: %s", 5555 self.top, 5556 album, 5557 self.top[album]) 5558 if len(self.top[album]) > 1: 5559 single_work_album = 0 5560 else: 5561 single_work_album = 1 5562 for topId in self.top[album]: 5563 self.create_trackback(release_id, album, topId) 5564 write_log( 5565 release_id, 5566 'info', 5567 "Top id = %s, Name = %s", 5568 topId, 5569 self.parts[topId]['name']) 5570 write_log( 5571 release_id, 5572 'info', 5573 "Trackback before levels: %s", 5574 self.trackback[album][topId]) 5575 work_part_levels = self.level_calc( 5576 release_id, self.trackback[album][topId], height) 5577 write_log( 5578 release_id, 5579 'info', 5580 "Trackback after levels: %s", 5581 self.trackback[album][topId]) 5582 # determine the level which will be the principal 'work' level 5583 if work_part_levels >= 3: 5584 ref_level = work_part_levels - single_work_album 5585 else: 5586 ref_level = work_part_levels 5587 # extended metadata scheme won't display more than 3 work levels 5588 # ref_level = min(3, ref_level) 5589 ref_height = work_part_levels - ref_level 5590 top_info = { 5591 'levels': work_part_levels, 5592 'id': topId, 5593 'name': self.parts[topId]['name'], 5594 'single': single_work_album} 5595 # set the metadata in sequence defined by the work structure 5596 answer = self.process_trackback( 5597 release_id, 5598 album, 5599 self.trackback[album][topId], 5600 ref_height, 5601 top_info) 5602 ## 5603 # trackback is a tree in the form {album: {id: , children:{id: , children{}, 5604 # id: etc}, 5605 # id: etc} } 5606 # process_trackback uses the trackback tree to derive title and level_0 based hierarchies 5607 # from the structure. It also returns a tuple (id, tracks), where tracks has the structure 5608 # {'track': [(track, height), (track, height), ...tuples...] 5609 # 'work': [[worknames], [worknames], ...lists...] 5610 # 'tracknumber': [num, num, ...floats of form n.nnn = disc.track...] 5611 # 'title': [title, title, ...strings...]} 5612 # each list is the same length - i.e. the number of tracks for the top work 5613 # there can be more than one workname for a track 5614 # height is the number of part levels for the related track 5615 ## 5616 if answer: 5617 tracks = sorted(zip(answer[1]['track'], answer[1]['tracknumber']), key=lambda x: x[1]) 5618 # need them in tracknumber sequence for the movement numbers to be correct 5619 write_log(release_id, 'info', "TRACKS: %s", tracks) 5620 # work_part_levels = self.trackback[album][topId]['depth'] 5621 movement_count = 0 5622 prev_movementgroup = None 5623 for track, _ in tracks: 5624 movement_count += 1 5625 track_meta = track[0] 5626 tm = track_meta.metadata 5627 if '~cwp_workid_0' in tm: 5628 workIds = tuple(str_to_list(tm['~cwp_workid_0'])) 5629 if workIds: 5630 count = 0 5631 self.process_work_artists( 5632 release_id, album, track_meta, workIds, tm, count) 5633 title_work_levels = 0 5634 if '~cwp_title_work_levels' in tm: 5635 title_work_levels = int(tm['~cwp_title_work_levels']) 5636 movementgroup = self.extend_metadata( 5637 release_id, 5638 top_info, 5639 track_meta, 5640 ref_height, 5641 title_work_levels) # revise for new data 5642 if track_meta not in self.tracks[album]: 5643 self.tracks[album][track_meta] = {} 5644 if movementgroup: 5645 if movementgroup != prev_movementgroup: 5646 movement_count = 1 5647 write_log( 5648 release_id, 5649 'debug', 5650 "processing movements for track: %s - movement-group is %s", 5651 track, movementgroup) 5652 self.tracks[album][track_meta]['movement-group'] = movementgroup 5653 self.tracks[album][track_meta]['movement-number'] = movement_count 5654 self.parts[tuple(movementgroup)]['movement-total'] = movement_count 5655 prev_movementgroup = movementgroup 5656 5657 write_log( 5658 release_id, 5659 'debug', 5660 "FINISHED TRACK PROCESSING FOR Top work id: %s", 5661 topId) 5662 # Need to redo the loop so that all album-wide tm is updated before 5663 # publishing 5664 for track, movement_info in self.tracks[album].items(): 5665 self.publish_metadata(release_id, album, track, movement_info) 5666 # # 5667 # The messages below are normally commented out as they get VERY long if there are a lot of albums loaded 5668 # For extreme debugging, remove the comments and just run one or a few albums 5669 # Do not forget to comment out again. 5670 # # 5671 # write_log(release_id, 'info', 'Self.parts: %s', self.parts) 5672 # write_log(release_id, 'info', 'Self.trackback: %s', self.trackback) 5673 5674 # tidy up 5675 self.trackback[album].clear() 5676 # Finally process the orphan tracks 5677 if album in self.orphan_tracks: 5678 for track in self.orphan_tracks[album]: 5679 tm = track.metadata 5680 options = self.options[track] 5681 if options['cwp_derive_works_from_title']: 5682 work, movt, inter_work = self.derive_from_title(release_id, track, tm['title']) 5683 tm['~cwp_extended_work'] = tm['~cwp_extended_groupheading'] = tm['~cwp_title_work'] = \ 5684 tm['~cwp_title_groupheading'] = tm['~cwp_work'] = tm['~cwp_groupheading']= work 5685 tm['~cwp_part'] = tm['~cwp_extended_part'] = tm['~cwp_title_part_0'] = movt 5686 tm['~cwp_inter_work'] = tm['~cwp_extended_inter_work'] = tm['~cwp_inter_title_work'] = inter_work 5687 self.publish_metadata(release_id, album, track) 5688 write_log(release_id, 'debug', "PROCESS ALBUM function complete") 5689 5690 def create_trackback(self, release_id, album, parentId): 5691 """ 5692 Create an inverse listing of the work-parent relationships 5693 :param release_id: 5694 :param album: 5695 :param parentId: 5696 :return: trackback for a given parentId 5697 """ 5698 write_log(release_id, 'debug', "Create trackback for %s", parentId) 5699 if parentId in self.partof[album]: # NB parentId is a tuple 5700 for child in self.partof[album][parentId]: # NB child is a tuple 5701 if child in self.partof[album]: 5702 child_trackback = self.create_trackback( 5703 release_id, album, child) 5704 self.append_trackback( 5705 release_id, album, parentId, child_trackback) 5706 else: 5707 self.append_trackback( 5708 release_id, album, parentId, self.trackback[album][child]) 5709 return self.trackback[album][parentId] 5710 else: 5711 return self.trackback[album][parentId] 5712 5713 def append_trackback(self, release_id, album, parentId, child): 5714 """ 5715 Recursive process to populate trackback 5716 :param release_id: name for log file - usually =musicbrainz_albumid 5717 unless called outside metadata processor 5718 :param album: 5719 :param parentId: 5720 :param child: 5721 :return: 5722 """ 5723 write_log(release_id, 'debug', "In append_trackback...") 5724 if parentId in self.trackback[album]: # NB parentId is a tuple 5725 if 'children' in self.trackback[album][parentId]: 5726 if child not in self.trackback[album][parentId]['children']: 5727 write_log(release_id, 'info', "TRYING TO APPEND...") 5728 self.trackback[album][parentId]['children'].append(child) 5729 write_log( 5730 release_id, 5731 'info', 5732 "...PARENT %s - ADDED %s as child", 5733 self.parts[parentId]['name'], 5734 child) 5735 else: 5736 write_log( 5737 release_id, 5738 'info', 5739 "Parent %s already has %s as child", 5740 parentId, 5741 child) 5742 else: 5743 self.trackback[album][parentId]['children'] = [child] 5744 write_log( 5745 release_id, 5746 'info', 5747 "Existing PARENT %s - ADDED %s as child", 5748 self.parts[parentId]['name'], 5749 child) 5750 else: 5751 self.trackback[album][parentId]['id'] = parentId 5752 self.trackback[album][parentId]['children'] = [child] 5753 write_log( 5754 release_id, 5755 'info', 5756 "New PARENT %s - ADDED %s as child", 5757 self.parts[parentId]['name'], 5758 child) 5759 write_log( 5760 release_id, 5761 'info', 5762 "APPENDED TRACKBACK: %s", 5763 self.trackback[album][parentId]) 5764 return self.trackback[album][parentId] 5765 5766 def level_calc(self, release_id, trackback, height): 5767 """ 5768 Recursive process to determine the max level for a work 5769 :param release_id: name for log file - usually =musicbrainz_albumid 5770 unless called outside metadata processor 5771 :param trackback: 5772 :param height: number of levels above this one 5773 :return: 5774 """ 5775 write_log(release_id, 'debug', 'In level_calc process') 5776 if 'children' not in trackback: 5777 write_log(release_id, 'info', "Got to bottom") 5778 trackback['height'] = height 5779 trackback['depth'] = 0 5780 return 0 5781 else: 5782 trackback['height'] = height 5783 height += 1 5784 max_depth = 0 5785 for child in trackback['children']: 5786 write_log(release_id, 'info', "CHILD: %s", child) 5787 depth = self.level_calc(release_id, child, height) + 1 5788 write_log(release_id, 'info', "DEPTH: %s", depth) 5789 max_depth = max(depth, max_depth) 5790 trackback['depth'] = max_depth 5791 return max_depth 5792 5793 ########################################### 5794 # SECTION 4 - Process tracks within album # 5795 ########################################### 5796 5797 def process_trackback( 5798 self, 5799 release_id, 5800 album_req, 5801 trackback, 5802 ref_height, 5803 top_info): 5804 """ 5805 Set work structure metadata & govern other metadata-setting processes 5806 :param release_id: name for log file - usually =musicbrainz_albumid 5807 unless called outside metadata processor 5808 :param album_req: 5809 :param trackback: 5810 :param ref_height: 5811 :param top_info: 5812 :return: 5813 """ 5814 write_log( 5815 release_id, 5816 'debug', 5817 "IN PROCESS_TRACKBACK. Trackback = %s", 5818 trackback) 5819 tracks = collections.defaultdict(dict) 5820 process_now = False 5821 if 'meta' in trackback: 5822 for track, album in trackback['meta']: 5823 if album_req == album: 5824 process_now = True 5825 if process_now or 'children' not in trackback: 5826 if 'meta' in trackback and 'id' in trackback and 'depth' in trackback and 'height' in trackback: 5827 write_log(release_id, 'info', "Processing level 0") 5828 depth = trackback['depth'] 5829 height = trackback['height'] 5830 workId = tuple(trackback['id']) 5831 if depth != 0: 5832 if 'children' in trackback: 5833 child_response = self.process_trackback_children( 5834 release_id, album_req, trackback, ref_height, top_info, tracks) 5835 tracks = child_response[1] 5836 write_log( 5837 release_id, 5838 'info', 5839 'Bottom level for this trackback is higher level elsewhere - adjusting levels') 5840 depth = 0 5841 write_log(release_id, 'info', "WorkId: %s, Work name: %s", workId, self.parts[workId]['name']) 5842 for track, album in trackback['meta']: 5843 if album == album_req: 5844 write_log(release_id, 'info', "Track: %s", track) 5845 tm = track.metadata 5846 write_log( 5847 release_id, 'info', "Track metadata = %s", tm) 5848 tm['~cwp_workid_' + str(depth)] = workId 5849 self.write_tags(release_id, track, tm, workId) 5850 self.make_annotations(release_id, track, workId) 5851 # strip leading and trailing spaces from work names 5852 if isinstance(self.parts[workId]['name'], str): 5853 worktemp = self.parts[workId]['name'].strip() 5854 else: 5855 for index, it in enumerate( 5856 self.parts[workId]['name']): 5857 self.parts[workId]['name'][index] = it.strip() 5858 worktemp = self.parts[workId]['name'] 5859 if isinstance(top_info['name'], str): 5860 toptemp = top_info['name'].strip() 5861 else: 5862 for index, it in enumerate(top_info['name']): 5863 top_info['name'][index] = it.strip() 5864 toptemp = top_info['name'] 5865 tm['~cwp_work_' + str(depth)] = worktemp 5866 tm['~cwp_part_levels'] = str(height) 5867 tm['~cwp_work_part_levels'] = str(top_info['levels']) 5868 tm['~cwp_workid_top'] = top_info['id'] 5869 tm['~cwp_work_top'] = toptemp 5870 tm['~cwp_single_work_album'] = top_info['single'] 5871 write_log( 5872 release_id, 'info', "Track metadata = %s", tm) 5873 if 'track' in tracks: 5874 tracks['track'].append((track, height)) 5875 else: 5876 tracks['track'] = [(track, height)] 5877 tracks['tracknumber'] = [int(tm['discnumber']) + (int(tm['tracknumber']) / 1000)] 5878 # Hopefully no more than 999 tracks per disc! 5879 write_log(release_id, 'info', "Tracks: %s", tracks) 5880 5881 response = (workId, tracks) 5882 write_log(release_id, 'debug', "LEAVING PROCESS_TRACKBACK") 5883 write_log( 5884 release_id, 5885 'info', 5886 "depth %s Response = %s", 5887 depth, 5888 response) 5889 return response 5890 else: 5891 return None 5892 else: 5893 response = self.process_trackback_children( 5894 release_id, album_req, trackback, ref_height, top_info, tracks) 5895 return response 5896 5897 def process_trackback_children( 5898 self, 5899 release_id, 5900 album_req, 5901 trackback, 5902 ref_height, 5903 top_info, 5904 tracks): 5905 """ 5906 TODO add some better documentation! 5907 :param release_id: name for log file - usually =musicbrainz_albumid 5908 unless called outside metadata processor 5909 :param album_req: 5910 :param trackback: 5911 :param ref_height: 5912 :param top_info: 5913 :param tracks: 5914 :return: 5915 """ 5916 if 'id' in trackback and 'depth' in trackback and 'height' in trackback: 5917 write_log( 5918 release_id, 5919 'debug', 5920 'In process_children_trackback for trackback %s', 5921 trackback) 5922 depth = trackback['depth'] 5923 height = trackback['height'] 5924 parentId = tuple(trackback['id']) 5925 parent = self.parts[parentId]['name'] 5926 width = 0 5927 for child in trackback['children']: 5928 width += 1 5929 write_log( 5930 release_id, 5931 'info', 5932 "child trackback = %s", 5933 child) 5934 answer = self.process_trackback( 5935 release_id, album_req, child, ref_height, top_info) 5936 if answer: 5937 workId = answer[0] 5938 child_tracks = answer[1]['track'] 5939 for track in child_tracks: 5940 track_meta = track[0] 5941 track_height = track[1] 5942 part_level = track_height - height 5943 write_log( 5944 release_id, 5945 'debug', 5946 "Calling set metadata %s", 5947 (part_level, 5948 workId, 5949 parentId, 5950 parent, 5951 track_meta)) 5952 self.set_metadata( 5953 release_id, part_level, workId, parentId, parent, track_meta) 5954 if 'track' in tracks: 5955 tracks['track'].append( 5956 (track_meta, track_height)) 5957 else: 5958 tracks['track'] = [(track_meta, track_height)] 5959 tm = track_meta.metadata 5960 # ~cwp_title if composer had to be removed 5961 title = tm['~cwp_title'] or tm['title'] 5962 if 'title' in tracks: 5963 tracks['title'].append(title) 5964 else: 5965 tracks['title'] = [title] 5966 # to make sure we get it as a list 5967 work = tm.getall('~cwp_work_0') 5968 if 'work' in tracks: 5969 tracks['work'].append(work) 5970 else: 5971 tracks['work'] = [work] 5972 if 'tracknumber' not in tm: 5973 tm['tracknumber'] = 0 5974 if 'discnumber' not in tm: 5975 tm['discnumber'] = 0 5976 if 'tracknumber' in tracks: 5977 tracks['tracknumber'].append( 5978 int(tm['discnumber']) + (int(tm['tracknumber']) / 1000)) 5979 else: 5980 tracks['tracknumber'] = [ 5981 int(tm['discnumber']) + (int(tm['tracknumber']) / 1000)] 5982 if tracks and 'track' in tracks: 5983 track = tracks['track'][0][0] 5984 # NB this will only be the first track of tracks, but its 5985 # options will be used for the structure 5986 self.derive_from_structure( 5987 release_id, top_info, tracks, height, depth, width, 'title') 5988 if self.options[track]["cwp_level0_works"]: 5989 # replace hierarchical works with those from work_0 (for 5990 # consistency) 5991 self.derive_from_structure( 5992 release_id, top_info, tracks, height, depth, width, 'work') 5993 5994 write_log( 5995 release_id, 5996 'info', 5997 "Trackback result for %s = %s", 5998 parentId, 5999 tracks) 6000 response = parentId, tracks 6001 write_log( 6002 release_id, 6003 'debug', 6004 "LEAVING PROCESS_CHILD_TRACKBACK depth %s Response = %s", 6005 depth, 6006 response) 6007 return response 6008 else: 6009 return None 6010 else: 6011 return None 6012 6013 def derive_from_structure( 6014 self, 6015 release_id, 6016 top_info, 6017 tracks, 6018 height, 6019 depth, 6020 width, 6021 name_type): 6022 """ 6023 Derive title (or work level-0) components from MB hierarchical work structure 6024 :param release_id: name for log file - usually =musicbrainz_albumid 6025 unless called outside metadata processor 6026 :param top_info: 6027 {'levels': work_part_levels,'id': topId,'name': self.parts[topId]['name'],'single': single_work_album} 6028 :param tracks: 6029 {'track':[(track1, height1), (track2, height2), ...], 'work': [work1, work2,...], 6030 'title': [title1, title2, ...], 'tracknumber': [tracknumber1, tracknumber2, ...]} 6031 where height is the number of levels in total in the branch for that track (i.e. height 1 => work_0 & work_1) 6032 :param height: number of levels above the current one 6033 :param depth: maximum number of levels 6034 :param width: number of siblings 6035 :param name_type: work or title 6036 :return: 6037 """ 6038 if 'track' in tracks: 6039 track = tracks['track'][0][0] 6040 # NB this will only be the first track of tracks, but its 6041 # options will be used for the structure 6042 single_work_track = False # default 6043 write_log( 6044 release_id, 6045 'debug', 6046 "Deriving info for %s from structure for tracks %s", 6047 name_type, 6048 tracks['track']) 6049 write_log( 6050 release_id, 6051 'info', 6052 '%ss are %r', 6053 name_type, 6054 tracks[name_type]) 6055 if 'tracknumber' in tracks: 6056 sorted_tracknumbers = sorted(tracks['tracknumber']) 6057 else: 6058 sorted_tracknumbers = None 6059 write_log( 6060 release_id, 6061 'info', 6062 "SORTED TRACKNUMBERS: %s", 6063 sorted_tracknumbers) 6064 common_len = 0 6065 if name_type in tracks: 6066 meta_str = "_title" if name_type == 'title' else "_X0" 6067 # in case of works, could be a list of lists 6068 name_list = tracks[name_type] 6069 write_log( 6070 release_id, 6071 'info', 6072 "%s list %s", 6073 name_type, 6074 name_list) 6075 if len(name_list) == 1: # only one track in this work so try and extract using colons 6076 single_work_track = True 6077 track_height = tracks['track'][0][1] 6078 if track_height - height > 0: # track_height - height == part_level 6079 if name_type == 'title': 6080 write_log( 6081 release_id, 6082 'debug', 6083 "Single track work. Deriving directly from title text: %s", 6084 track) 6085 ti = name_list[0] 6086 common_subset = self.derive_from_title( 6087 release_id, track, ti)[0] 6088 else: 6089 common_subset = "" 6090 else: 6091 common_subset = name_list[0] 6092 write_log( 6093 release_id, 6094 'info', 6095 "%s is single-track work. common_subset is set to %s", 6096 tracks['track'][0][0], 6097 common_subset) 6098 if common_subset: 6099 common_len = len(common_subset) 6100 else: 6101 common_len = 0 6102 else: # NB if names are lists of lists, we'll assume they all start the same way 6103 if isinstance(name_list[0], list): 6104 compare = name_list[0][0].split() 6105 else: 6106 # a list of the words in the first name 6107 compare = name_list[0].split() 6108 for name_item in name_list: 6109 if isinstance(name_item, list): 6110 name = name_item[0] 6111 else: 6112 name = name_item 6113 lcs = longest_common_sequence(compare, name.split()) 6114 compare = lcs['sequence'] 6115 if not compare: 6116 common_len = 0 6117 break 6118 if lcs['length'] > 0: 6119 common_subset = " ".join(compare) 6120 write_log( 6121 release_id, 6122 'info', 6123 "Common subset from %ss at level %s, item name %s ..........", 6124 name_type, 6125 tracks['track'][0][1] - 6126 height, 6127 name) 6128 write_log( 6129 release_id, 'info', "..........is %s", common_subset) 6130 common_len = len(common_subset) 6131 6132 write_log( 6133 release_id, 6134 'info', 6135 "checked for common sequence - length is %s", 6136 common_len) 6137 for track_index, track_item in enumerate(tracks['track']): 6138 track_meta = track_item[0] 6139 tm = track_meta.metadata 6140 top_level = int(tm['~cwp_part_levels']) 6141 part_level = track_item[1] - height 6142 if common_len > 0: 6143 self.create_work_levels(release_id, name_type, tracks, track, track_index, 6144 track_meta, tm, meta_str, part_level, depth, width, common_len) 6145 6146 else: # (no common substring at this level) 6147 if name_type == 'work': 6148 write_log(release_id, 'info', 6149 'single track work - indicator = %s. track = %s, part_level = %s, top_level = %s', 6150 single_work_track, track_item, part_level, top_level) 6151 if part_level >= top_level: # so it won't be covered by top-down action 6152 for level in range( 6153 0, part_level + 1): # fill in the missing work names from the canonical list 6154 if '~cwp' + meta_str + '_work_' + \ 6155 str(level) not in tm: 6156 tm['~cwp' + 6157 meta_str + 6158 '_work_' + 6159 str(level)] = tm['~cwp_work_' + 6160 str(level)] 6161 if level > 0: 6162 self.level0_warn(release_id, tm, level) 6163 if '~cwp' + meta_str + '_part_' + \ 6164 str(level) not in tm and '~cwp_part_' + str(level) in tm: 6165 tm['~cwp' + 6166 meta_str + 6167 '_part_' + 6168 str(level)] = tm['~cwp_part_' + 6169 str(level)] 6170 if level > 0: 6171 self.level0_warn(release_id, tm, level) 6172 6173 6174 def create_work_levels(self, release_id, name_type, tracks, track, track_index, 6175 track_meta, tm, meta_str, part_level, depth, width, common_len): 6176 """ 6177 For a group of tracks with common metadata in the title/level0 work, create the work structure 6178 for that metadata, using the structure in the MB database 6179 :param release_id: 6180 :param name_type: title or work 6181 :param tracks: {'track':[(track1, height1), (track2, height2), ...], 'work': [work1, work2,...], 6182 'title': [title1, title2, ...], 'tracknumber': [tracknumber1, tracknumber2, ...]} 6183 where height is the number of levels in total in the branch for that track (i.e. height 1 => work_0 & work_1) 6184 :param track: 6185 :param track_index: index of track in tracks 6186 :param track_meta: 6187 :param tm: track meta (dup?) 6188 :param meta_str: string created from name_type 6189 :param part_level: The level of the current item in the works hierarchy 6190 :param depth: The number of levels below the current item 6191 :param width: The number of children of the current item 6192 :param common_len: length of the common text 6193 :return: 6194 """ 6195 allow_repeats = True 6196 write_log( 6197 release_id, 6198 'info', 6199 "Use %s info for track: %s at level %s", 6200 name_type, 6201 track_meta, 6202 part_level) 6203 name = tracks[name_type][track_index] 6204 if isinstance(name, list): 6205 work = name[0][:common_len] 6206 else: 6207 work = name[:common_len] 6208 work = work.rstrip(":,.;- ") 6209 if self.options[track]["cwp_removewords_p"]: 6210 removewords = self.options[track]["cwp_removewords_p"].split( 6211 ',') 6212 else: 6213 removewords = [] 6214 write_log( 6215 release_id, 6216 'info', 6217 "Prefixes (in %s) = %s", 6218 name_type, 6219 removewords) 6220 for prefix in removewords: 6221 prefix2 = str(prefix).lower().rstrip() 6222 if prefix2[0] != " ": 6223 prefix2 = " " + prefix2 6224 write_log( 6225 release_id, 'info', "checking prefix %s", prefix2) 6226 if work.lower().endswith(prefix2): 6227 if len(prefix2) > 0: 6228 work = work[:-len(prefix2)] 6229 common_len = len(work) 6230 work = work.rstrip(":,.;- ") 6231 if work.lower() == prefix2.strip(): 6232 work = '' 6233 common_len = 0 6234 write_log( 6235 release_id, 6236 'info', 6237 "work after prefix strip %s", 6238 work) 6239 write_log(release_id, 'info', "Prefixes checked") 6240 6241 tm['~cwp' + meta_str + '_work_' + 6242 str(part_level)] = work 6243 6244 if part_level > 0 and name_type == "work": 6245 write_log( 6246 release_id, 6247 'info', 6248 'checking if %s is repeated name at part_level = %s', 6249 work, 6250 part_level) 6251 write_log(release_id, 'info', 'lower work name is %s', 6252 tm['~cwp' + meta_str + '_work_' + str(part_level - 1)]) 6253 # fill in missing names caused by no common string at lower levels 6254 # count the missing levels and push the current name 6255 # down to the lowest missing level 6256 missing_levels = 0 6257 fill_level = part_level - 1 6258 while '~cwp' + meta_str + '_work_' + \ 6259 str(fill_level) not in tm: 6260 missing_levels += 1 6261 fill_level -= 1 6262 if fill_level < 0: 6263 break 6264 write_log( 6265 release_id, 6266 'info', 6267 'there is/are %s missing level(s)', 6268 missing_levels) 6269 if missing_levels > 0: 6270 allow_repeats = True 6271 for lev in range( 6272 part_level - missing_levels, part_level): 6273 6274 if lev > 0: # not filled_lowest and lev > 0: 6275 tm['~cwp' + meta_str + 6276 '_work_' + str(lev)] = work 6277 tm['~cwp' + 6278 meta_str + 6279 '_part_' + 6280 str(lev - 1)] = self.strip_parent_from_work(track, 6281 release_id, 6282 interpret(tm['~cwp' + meta_str + '_work_' 6283 + str(lev - 1)]), 6284 tm['~cwp' + meta_str + '_work_' + str(lev)], 6285 lev - 1, False)[0] 6286 else: 6287 tm['~cwp' + meta_str + '_work_' + str(lev)] = tm['~cwp_work_' + str(lev)] 6288 6289 if missing_levels > 0: 6290 write_log(release_id, 'info', 'lower work name is now %r', tm.getall( 6291 '~cwp' + meta_str + '_work_' + str(part_level - 1))) 6292 # now fix the repeated work name at this level 6293 if work == tm['~cwp' + meta_str + '_work_' + 6294 str(part_level - 1)] and not allow_repeats: 6295 tm['~cwp' + 6296 meta_str + 6297 '_work_' + 6298 str(part_level)] = tm['~cwp_work_' + 6299 str(part_level)] 6300 self.level0_warn(release_id, tm, part_level) 6301 tm['~cwp' + 6302 meta_str + 6303 '_part_' + 6304 str(part_level - 6305 1)] = self.strip_parent_from_work(track, 6306 release_id, 6307 tm.getall('~cwp' + meta_str + '_work_' + str(part_level - 1)), 6308 tm['~cwp' + meta_str + '_work_' + str(part_level)], 6309 part_level - 1, False)[0] 6310 if part_level == 1: 6311 if isinstance(name, list): 6312 movt = [x[common_len:].strip().lstrip(":,.;- ") 6313 for x in name] 6314 else: 6315 movt = name[common_len:].strip().lstrip(":,.;- ") 6316 write_log( 6317 release_id, 'info', "%s - movt = %s", name_type, movt) 6318 tm['~cwp' + meta_str + '_part_0'] = movt 6319 write_log( 6320 release_id, 6321 'info', 6322 "%s Work part_level = %s", 6323 name_type, 6324 part_level) 6325 if name_type == 'title': 6326 if '~cwp_title_work_' + str(part_level - 1) in tm and tm['~cwp_title_work_' + str( 6327 part_level)] == tm['~cwp_title_work_' + str(part_level - 1)] and width == 1: 6328 pass # don't count higher part-levels which are not distinct from lower ones 6329 # when the parent work has only one child 6330 else: 6331 tm['~cwp_title_work_levels'] = depth 6332 tm['~cwp_title_part_levels'] = part_level 6333 write_log( 6334 release_id, 6335 'info', 6336 "Set new metadata for %s OK", 6337 name_type) 6338 6339 def level0_warn(self, release_id, tm, level): 6340 """ 6341 Issue warnings if inadequate level 0 data 6342 :param release_id: name for log file - usually =musicbrainz_albumid 6343 unless called outside metadata processor 6344 :param tm: 6345 :param level: 6346 :return: 6347 """ 6348 write_log( 6349 release_id, 6350 'warning', 6351 'Unable to use level 0 as work name source in level %s - using hierarchy instead', 6352 level) 6353 if self.WARNING: 6354 self.append_tag( 6355 release_id, 6356 tm, 6357 '~cwp_warning', 6358 '5. Unable to use level 0 as work name source in level ' + 6359 str(level) + 6360 ' - using hierarchy instead') 6361 6362 def set_metadata( 6363 self, 6364 release_id, 6365 part_level, 6366 workId, 6367 parentId, 6368 parent, 6369 track): 6370 """ 6371 Set the names of works and parts 6372 :param release_id: name for log file - usually =musicbrainz_albumid 6373 unless called outside metadata processor 6374 :param part_level: 6375 :param workId: 6376 :param parentId: 6377 :param parent: 6378 :param track: 6379 :return: 6380 """ 6381 write_log( 6382 release_id, 6383 'debug', 6384 "SETTING METADATA FOR TRACK = %r, parent = %s, part_level = %s", 6385 track, 6386 parent, 6387 part_level) 6388 tm = track.metadata 6389 if parentId: 6390 self.write_tags(release_id, track, tm, parentId) 6391 self.make_annotations(release_id, track, parentId) 6392 if 'annotations' in self.parts[workId]: 6393 work_annotations = self.parts[workId]['annotations'] 6394 self.parts[workId]['stripped_annotations'] = work_annotations 6395 else: 6396 work_annotations = [] 6397 if 'annotations' in self.parts[parentId]: 6398 parent_annotations = self.parts[parentId]['annotations'] 6399 else: 6400 parent_annotations = [] 6401 if parent_annotations: 6402 work_annotations = [ 6403 z for z in work_annotations if z not in parent_annotations] 6404 self.parts[workId]['stripped_annotations'] = work_annotations 6405 6406 tm['~cwp_workid_' + str(part_level)] = parentId 6407 tm['~cwp_work_' + str(part_level)] = parent 6408 # maybe more than one work name 6409 work = self.parts[workId]['name'] 6410 write_log(release_id, 'info', "Set work name to: %s", work) 6411 works = [] 6412 # in case there is only one and it isn't in a list 6413 if isinstance(work, str): 6414 works.append(work) 6415 else: 6416 works = work[:] 6417 stripped_works = [] 6418 for work in works: 6419 extend = True 6420 strip = self.strip_parent_from_work( 6421 track, release_id, work, parent, part_level, extend, parentId, workId) 6422 6423 stripped_works.append(strip[0]) 6424 write_log( 6425 release_id, 6426 'info', 6427 "Parent: %s, Stripped works = %s", 6428 parent, 6429 stripped_works) 6430 # now == parent, after removing full_parent logic 6431 full_parent = strip[1] 6432 if full_parent != parent: 6433 tm['~cwp_work_' + 6434 str(part_level)] = full_parent.strip() 6435 self.parts[parentId]['name'] = full_parent 6436 if 'no_parent' in self.parts[parentId]: 6437 if self.parts[parentId]['no_parent']: 6438 tm['~cwp_work_top'] = full_parent.strip() 6439 tm['~cwp_part_' + str(part_level - 1)] = stripped_works 6440 self.parts[workId]['stripped_name'] = stripped_works 6441 write_log(release_id, 'debug', "GOT TO END OF SET_METADATA") 6442 6443 def write_tags(self, release_id, track, tm, workId): 6444 """ 6445 write genre-related tags from internal variables 6446 :param track: 6447 :param release_id: name for log file - usually =musicbrainz_albumid 6448 unless called outside metadata processor 6449 :param tm: track metadata 6450 :param workId: MBID of current work 6451 :return: None - just writes tags 6452 """ 6453 options = self.options[track] 6454 candidate_genres = [] 6455 if options['cwp_genres_use_folks'] and 'folks_genres' in self.parts[workId]: 6456 candidate_genres += self.parts[workId]['folks_genres'] 6457 if options['cwp_genres_use_worktype'] and 'worktype_genres' in self.parts[workId]: 6458 candidate_genres += self.parts[workId]['worktype_genres'] 6459 self.append_tag( 6460 release_id, 6461 tm, 6462 '~cwp_worktype_genres', 6463 self.parts[workId]['worktype_genres']) 6464 self.append_tag( 6465 release_id, 6466 tm, 6467 '~cwp_candidate_genres', 6468 candidate_genres) 6469 self.append_tag(release_id, tm, '~cwp_keys', self.parts[workId]['key']) 6470 self.append_tag(release_id, tm, '~cwp_composed_dates', 6471 self.parts[workId]['composed_dates']) 6472 self.append_tag(release_id, tm, '~cwp_published_dates', 6473 self.parts[workId]['published_dates']) 6474 self.append_tag(release_id, tm, '~cwp_premiered_dates', 6475 self.parts[workId]['premiered_dates']) 6476 6477 def make_annotations(self, release_id, track, wid): 6478 """ 6479 create an 'annotations' entry in the 'parts' dict, as dictated by options, from dates and keys 6480 :param release_id: name for log file - usually =musicbrainz_albumid 6481 unless called outside metadata processor 6482 :param track: the current track 6483 :param wid: the current work MBID 6484 :return: 6485 """ 6486 write_log( 6487 release_id, 6488 'debug', 6489 "Starting module %s", 6490 'make_annotations') 6491 options = self.options[track] 6492 if options['cwp_workdate_include']: 6493 if options['cwp_workdate_source_composed'] and 'composed_dates' in self.parts[wid] and self.parts[wid]['composed_dates']: 6494 workdates = self.parts[wid]['composed_dates'] 6495 elif options['cwp_workdate_source_published'] and 'published_dates' in self.parts[wid] and self.parts[wid]['published_dates']: 6496 workdates = self.parts[wid]['published_dates'] 6497 elif options['cwp_workdate_source_premiered'] and 'premiered_dates' in self.parts[wid] and self.parts[wid]['premiered_dates']: 6498 workdates = self.parts[wid]['premiered_dates'] 6499 else: 6500 workdates = [] 6501 else: 6502 workdates = [] 6503 keys = [] 6504 if options['cwp_key_include'] and 'key' in self.parts[wid] and self.parts[wid]['key']: 6505 keys = self.parts[wid]['key'] 6506 elif options['cwp_key_contingent_include'] and 'key' in self.parts[wid] and self.parts[wid]['key']\ 6507 and 'name' in self.parts[wid]: 6508 write_log( 6509 release_id, 6510 'info', 6511 'checking for key. keys = %s, names = %s', 6512 self.parts[wid]['key'], 6513 self.parts[wid]['name']) 6514 # add all the parent names to the string for checking - 6515 work_name = list_to_str(self.parts[wid]['name']) 6516 work_chk = wid 6517 while work_chk in self.works_cache: 6518 parent_chk = tuple(self.works_cache[work_chk]) 6519 if parent_chk in self.parts and self.parts[parent_chk] and 'name' in self.parts[parent_chk] and self.parts[parent_chk]['name']: 6520 parent_name = list_to_str(self.parts[parent_chk]['name']) 6521 p_name_orig = self.parts[parent_chk]['name'] 6522 p_chk = self.parts[parent_chk] 6523 work_name = parent_name + ': ' + work_name 6524 work_chk = parent_chk 6525 # now see if the key has been mentioned in the work or its parents 6526 for key in self.parts[wid]['key']: 6527 # if not any([key.lower() in x.lower() for x in 6528 # str_to_list(work_name)]): # TODO remove 6529 if not key.lower() in work_name.lower(): 6530 keys.append(key) 6531 annotations = keys + workdates 6532 if annotations: 6533 self.parts[wid]['annotations'] = annotations 6534 else: 6535 if 'annotations' in self.parts[wid]: 6536 del self.parts[wid]['annotations'] 6537 write_log( 6538 release_id, 6539 'info', 6540 'make annotations has set id %s on track %s with annotation %s', 6541 wid, 6542 track, 6543 annotations) 6544 write_log( 6545 release_id, 6546 'debug', 6547 "Ending module %s", 6548 'make_annotations') 6549 6550 @staticmethod 6551 def derive_from_title(release_id, track, title): 6552 """ 6553 Attempt to parse title to get components 6554 :param release_id: name for log file - usually =musicbrainz_albumid 6555 unless called outside metadata processor 6556 :param track: 6557 :param title: 6558 :return: 6559 """ 6560 write_log( 6561 release_id, 6562 'info', 6563 "DERIVING METADATA FROM TITLE for track: %s", 6564 track) 6565 tm = track.metadata 6566 movt = title 6567 work = "" 6568 colons = title.count(": ") 6569 inter_work = None 6570 if '~cwp_part_levels' in tm: 6571 part_levels = int(tm['~cwp_part_levels']) 6572 if int(tm['~cwp_work_part_levels'] 6573 ) > 0: # we have a work with movements 6574 if colons > 0: 6575 title_split = title.split(': ', 1) 6576 title_rsplit = title.rsplit(': ', 1) 6577 if part_levels >= colons: 6578 work = title_rsplit[0] 6579 movt = title_rsplit[1] 6580 else: 6581 work = title_split[0] 6582 movt = title_split[1] 6583 else: 6584 # No works found so try and just get parts from title 6585 if colons > 0: 6586 title_split = title.rsplit(': ', 1) 6587 work = title_split[0] 6588 if colons > 1: 6589 colon_ind = work.rfind(':') 6590 inter_work = work[colon_ind + 1:].strip() 6591 work = work[:colon_ind] 6592 movt = title_split[1] 6593 write_log(release_id, 'info', "Work %s, Movt %s", work, movt) 6594 return work, movt, inter_work 6595 6596 def process_work_artists( 6597 self, 6598 release_id, 6599 album, 6600 track, 6601 workIds, 6602 tm, 6603 count): 6604 """ 6605 Carry out the artist processing that needs to be done in the PartLevels class 6606 as it requires XML lookups of the works 6607 :param release_id: name for log file - usually =musicbrainz_albumid 6608 unless called outside metadata processor 6609 :param album: 6610 :param track: 6611 :param workIds: 6612 :param tm: 6613 :param count: 6614 :return: 6615 """ 6616 if not self.options[track]['classical_extra_artists']: 6617 write_log( 6618 release_id, 6619 'debug', 6620 'Not processing work_artists as ExtraArtists not selected to be run') 6621 return None 6622 write_log( 6623 release_id, 6624 'debug', 6625 'In process_work_artists for track: %s, workIds: %s', 6626 track, 6627 workIds) 6628 write_log( 6629 release_id, 6630 'debug', 6631 'In process_work_artists for track: %s, self.parts: %s', 6632 track, 6633 self.parts) 6634 if workIds in self.parts and 'arrangers' in self.parts[workIds]: 6635 write_log( 6636 release_id, 6637 'info', 6638 'Arrangers = %s', 6639 self.parts[workIds]['arrangers']) 6640 set_work_artists( 6641 self, 6642 release_id, 6643 album, 6644 track, 6645 self.parts[workIds]['arrangers'], 6646 tm, 6647 count) 6648 if workIds in self.works_cache: 6649 count += 1 6650 self.process_work_artists(release_id, album, track, tuple( 6651 self.works_cache[workIds]), tm, count) 6652 6653 ################################################# 6654 # SECTION 5 - Extend work metadata using titles # 6655 ################################################# 6656 6657 def extend_metadata(self, release_id, top_info, track, ref_height, depth): 6658 """ 6659 Combine MB work and title data according to user options 6660 :param release_id: name for log file - usually =musicbrainz_albumid 6661 unless called outside metadata processor 6662 :param top_info: 6663 :param track: 6664 :param ref_height: 6665 :param depth: 6666 :return: 6667 """ 6668 write_log(release_id, 'debug', 'IN EXTEND_METADATA') 6669 tm = track.metadata 6670 options = self.options[track] 6671 movementgroup = () 6672 if '~cwp_part_levels' not in tm: 6673 write_log( 6674 release_id, 6675 'debug', 6676 'NO PART LEVELS. Metadata = %s', 6677 tm) 6678 return None 6679 part_levels = int(tm['~cwp_part_levels']) 6680 write_log( 6681 release_id, 6682 'debug', 6683 "Extending metadata for track: %s, ref_height: %s, depth: %s, part_levels: %s", 6684 track, 6685 ref_height, 6686 depth, 6687 part_levels) 6688 write_log(release_id, 'info', "Metadata = %s", tm) 6689 6690 # previously: ref_height = work_part_levels - ref_level, 6691 # where this ref-level is the level for the top-named work 6692 # so ref_height is effectively the "single work album" indicator (1 or 0) - 6693 # i.e. where all tracks are part of one work which is implicitly the album 6694 # without there being a groupheading for it 6695 ref_level = part_levels - ref_height 6696 # work_ref_level = work_part_levels - ref_height # not currently used 6697 6698 # replace works and parts by those derived from the level 0 work, where 6699 # required, available and appropriate, but only use work names based on 6700 # level 0 text if it doesn't cause ambiguity 6701 6702 # before embellishing with partial / arrangement etc 6703 vanilla_part = tm['~cwp_part_0'] 6704 6705 # Fix text for arrangements, partials and medleys (Done here so that 6706 # cache can be used) 6707 if options['cwp_arrangements'] and options["cwp_arrangements_text"]: 6708 for lev in range( 6709 0, 6710 ref_level): # top level will not be an arrangement else there would be a higher level 6711 # needs to be a tuple to match 6712 if '~cwp_workid_' + str(lev) in tm: 6713 tup_id = tuple(str_to_list(tm['~cwp_workid_' + str(lev)])) 6714 if 'arrangement' in self.parts[tup_id] and self.parts[tup_id]['arrangement']: 6715 update_list = ['~cwp_work_', '~cwp_part_'] 6716 if options["cwp_level0_works"] and '~cwp_X0_work_' + \ 6717 str(lev) in tm: 6718 update_list += ['~cwp_X0_work_', '~cwp_X0_part_'] 6719 for item in update_list: 6720 tm[item + str(lev)] = options["cwp_arrangements_text"] + \ 6721 ' ' + tm[item + str(lev)] 6722 6723 if options['cwp_partial'] and options["cwp_partial_text"]: 6724 if '~cwp_workid_0' in tm: 6725 work0_id = tuple(str_to_list(tm['~cwp_workid_0'])) 6726 if 'partial' in self.parts[work0_id] and self.parts[work0_id]['partial']: 6727 update_list = ['~cwp_work_0', '~cwp_part_0'] 6728 if options["cwp_level0_works"] and '~cwp_X0_work_0' in tm: 6729 update_list += ['~cwp_X0_work_0', '~cwp_X0_part_0'] 6730 for item in update_list: 6731 meta_item = tm.getall(item) 6732 if isinstance( 6733 meta_item, list): # it should be a list as I think getall always returns a list 6734 if meta_item == []: 6735 meta_item.append(options["cwp_partial_text"]) 6736 else: 6737 for ind, w in enumerate(meta_item): 6738 meta_item[ind] = options["cwp_partial_text"] + ' ' + w 6739 write_log( 6740 release_id, 'info', 'now meta item is %s', meta_item) 6741 tm[item] = meta_item 6742 else: 6743 tm[item] = options["cwp_partial_text"] + \ 6744 ' ' + tm[item] 6745 write_log( 6746 release_id, 'info', 'meta item is not a list') 6747 6748 # fix "type 1" medley text 6749 if options['cwp_medley']: 6750 for lev in range(0, ref_level + 1): 6751 if '~cwp_workid_' + str(lev) in tm: 6752 tup_id = tuple(str_to_list(tm['~cwp_workid_' + str(lev)])) 6753 if 'medley_list' in self.parts[tup_id] and self.parts[tup_id]['medley_list']: 6754 medley_list = self.parts[tup_id]['medley_list'] 6755 tm['~cwp_work_' + str(lev)] += " (" + options["cwp_medley_text"] + \ 6756 ': ' + ', '.join(medley_list) + ")" 6757 if '~cwp_part_' + str(lev) in tm: 6758 tm['~cwp_part_' + str( 6759 lev)] = "(" + options["cwp_medley_text"] + ") " + tm['~cwp_part_' + str(lev)] 6760 6761 # add any annotations for dates and keys 6762 if options['cwp_workdate_include'] or options['cwp_key_include'] or options['cwp_key_contingent_include']: 6763 if options["cwp_titles"] and part_levels == 0: 6764 # ~cwp_title_work_0 will not have been set, but need it to hold any annotations 6765 tm['~cwp_title_work_0'] = tm['~cwp_title'] or tm['title'] 6766 for lev in range(0, part_levels + 1): 6767 if '~cwp_workid_' + str(lev) in tm: 6768 tup_id = tuple(str_to_list(tm['~cwp_workid_' + str(lev)])) 6769 if 'annotations' in self.parts[tup_id]: 6770 write_log( 6771 release_id, 6772 'info', 6773 'in extend_metadata, annotations for id %s on track %s are %s', 6774 tup_id, 6775 track, 6776 self.parts[tup_id]['annotations']) 6777 tm['~cwp_work_' + str(lev)] += " (" + \ 6778 ', '.join(self.parts[tup_id]['annotations']) + ")" 6779 if options["cwp_level0_works"] and '~cwp_X0_work_' + \ 6780 str(lev) in tm: 6781 tm['~cwp_X0_work_' + str(lev)] += " (" + ', '.join( 6782 self.parts[tup_id]['annotations']) + ")" 6783 if options["cwp_titles"] and '~cwp_title_work_' + \ 6784 str(lev) in tm: 6785 tm['~cwp_title_work_' + str(lev)] += " (" + ', '.join( 6786 self.parts[tup_id]['annotations']) + ")" 6787 if lev < part_levels: 6788 if 'stripped_annotations' in self.parts[tup_id]: 6789 if self.parts[tup_id]['stripped_annotations']: 6790 tm['~cwp_part_' + str(lev)] += " (" + ', '.join( 6791 self.parts[tup_id]['stripped_annotations']) + ")" 6792 if options["cwp_level0_works"] and '~cwp_X0_part_' + \ 6793 str(lev) in tm: 6794 tm['~cwp_X0_part_' + str(lev)] += " (" + ', '.join( 6795 self.parts[tup_id]['stripped_annotations']) + ")" 6796 if options["cwp_titles"] and '~cwp_title_part_' + \ 6797 str(lev) in tm: 6798 tm['~cwp_title_part' + str(lev)] += " (" + ', '.join( 6799 self.parts[tup_id]['stripped_annotations']) + ")" 6800 6801 part = [] 6802 work = [] 6803 for level in range(0, part_levels): 6804 part.append(tm['~cwp_part_' + str(level)]) 6805 work.append(tm['~cwp_work_' + str(level)]) 6806 work.append(tm['~cwp_work_' + str(part_levels)]) 6807 6808 # Use level_0-derived names if applicable 6809 if options["cwp_level0_works"]: 6810 for level in range(0, part_levels + 1): 6811 if '~cwp_X0_work_' + str(level) in tm: 6812 work[level] = tm['~cwp_X0_work_' + str(level)] 6813 else: 6814 if level != 0: 6815 work[level] = '' 6816 if part and len(part) > level: 6817 if '~cwp_X0_part_' + str(level) in tm: 6818 part[level] = tm['~cwp_X0_part_' + str(level)] 6819 else: 6820 if level != 0: 6821 part[level] = '' 6822 6823 # set up group heading and part 6824 if part_levels > 0: 6825 groupheading = work[1] 6826 work_main = work[ref_level] 6827 inter_work = None 6828 work_titles = tm['~cwp_title_work_' + str(ref_level)] 6829 if ref_level > 1: 6830 for r in range(1, ref_level): 6831 if inter_work: 6832 inter_work = ': ' + inter_work 6833 inter_work = part[r] + (inter_work or '') 6834 groupheading = work[ref_level] + ':: ' + (inter_work or '') 6835 else: 6836 groupheading = work[0] 6837 work_main = groupheading 6838 inter_work = None 6839 work_titles = None 6840 6841 # determine movement grouping (highest level that is not a collection) 6842 if '~cwp_workid_top' in tm: 6843 movementgroup = tuple(str_to_list(tm['~cwp_workid_top'])) 6844 n = part_levels 6845 write_log( 6846 release_id, 6847 'debug', 6848 "In extend. self.parts[%s]['is_collection']: %s", 6849 movementgroup, 6850 self.parts[movementgroup]['is_collection']) 6851 while self.parts[movementgroup]['is_collection']: 6852 n -= 1 6853 if n < 0: 6854 # shouldn't happen in theory as bottom level can't be a collection, but just in case... 6855 break 6856 if '~cwp_workid_' + str(n) in tm: 6857 movementgroup = tuple(str_to_list(tm['~cwp_workid_' + str(n)])) 6858 else: 6859 break 6860 6861 # set part text (initially) 6862 if part: 6863 part_main = part[0] 6864 else: 6865 part_main = work[0] 6866 tm['~cwp_part'] = part_main 6867 6868 # fix medley text for "type 2" medleys 6869 type2_medley = False 6870 if self.parts[tuple(str_to_list(tm['~cwp_workid_0'])) 6871 ]['medley'] and options['cwp_medley']: 6872 if options["cwp_medley_text"]: 6873 if part_levels > 0: 6874 medleyheading = groupheading + ':: ' + part[0] 6875 else: 6876 medleyheading = groupheading 6877 groupheading = medleyheading + \ 6878 ' (' + options["cwp_medley_text"] + ')' 6879 type2_medley = True 6880 6881 tm['~cwp_groupheading'] = groupheading 6882 tm['~cwp_work'] = work_main 6883 tm['~cwp_inter_work'] = inter_work 6884 tm['~cwp_title_work'] = work_titles 6885 write_log( 6886 release_id, 6887 'debug', 6888 "Groupheading set to: %s", 6889 groupheading) 6890 # extend group heading from title metadata 6891 if groupheading: 6892 ext_groupheading = groupheading 6893 title_groupheading = None 6894 ext_work = work_main 6895 ext_inter_work = inter_work 6896 inter_title_work = "" 6897 6898 if '~cwp_title_work_levels' in tm: 6899 6900 title_depth = int(tm['~cwp_title_work_levels']) 6901 write_log( 6902 release_id, 6903 'info', 6904 "Title_depth: %s", 6905 title_depth) 6906 diff_work = [""] * ref_level 6907 diff_part = [""] * ref_level 6908 title_tag = [""] 6909 # level 0 work for title # was 'x' # to avoid errors, reset 6910 # before used 6911 tw_str_lower = 'title' 6912 max_d = min(ref_level, title_depth) + 1 6913 for d in range(1, max_d): 6914 tw_str = '~cwp_title_work_' + str(d) 6915 write_log(release_id, 'info', "TW_STR = %s", tw_str) 6916 if tw_str in tm: 6917 title_tag.append(tm[tw_str]) 6918 title_work = title_tag[d] 6919 work_main = '' 6920 for w in range(d, ref_level + 1): 6921 work_main += (work[w] + ' ') 6922 diff_work[d - 1] = self.diff_pair( 6923 release_id, track, tm, work_main, title_work) 6924 if diff_work[d - 1]: 6925 diff_work[d - 1] = diff_work[d - 1].strip('.;:-,') 6926 if diff_work[d - 1] == '…': 6927 diff_work[d - 1] = '' 6928 if d > 1 and tw_str_lower in tm: 6929 title_part = self.strip_parent_from_work( 6930 track, release_id, tm[tw_str_lower], tm[tw_str], 0, False)[0] 6931 if title_part: 6932 title_part = title_part.strip(' .;:-,') 6933 tm['~cwp_title_part_' + 6934 str(d - 1)] = title_part 6935 part_n = part[d - 1] 6936 diff_part[d - 1] = self.diff_pair( 6937 release_id, track, tm, part_n, title_part) or "" 6938 if diff_part[d - 1] == '…': 6939 diff_part[d - 1] = '' 6940 else: 6941 title_tag.append('') 6942 tw_str_lower = tw_str 6943 # remove duplicate items at lower levels in diff_work: 6944 for w in range(ref_level - 2, -1, -1): 6945 for higher in range(1, ref_level - w): 6946 if diff_work[w] and diff_work[w + higher]: 6947 diff_work[w] = diff_work[w].replace( 6948 diff_work[w + higher], '').strip(' .;:-,\u2026') 6949 # if diff_work[w] == '…': 6950 # diff_work[w] = '' 6951 write_log( 6952 release_id, 6953 'info', 6954 "diff list for works: %s", 6955 diff_work) 6956 write_log( 6957 release_id, 6958 'info', 6959 "diff list for parts: %s", 6960 diff_part) 6961 if not diff_work or len(diff_work) == 0: 6962 if part_levels > 0: 6963 ext_groupheading = groupheading 6964 else: 6965 write_log( 6966 release_id, 6967 'debug', 6968 "Now calc extended groupheading...") 6969 write_log( 6970 release_id, 6971 'info', 6972 "depth = %s, ref_level = %s, title_depth = %s", 6973 depth, 6974 ref_level, 6975 title_depth) 6976 write_log( 6977 release_id, 6978 'info', 6979 "diff_work = %s, diff_part = %s", 6980 diff_work, 6981 diff_part) 6982 # remove duplications: 6983 for lev in range(1, ref_level): 6984 for diff_list in [diff_work, diff_part]: 6985 if diff_list[lev] and diff_list[lev - 1]: 6986 diff_list[lev - 1] = self.diff_pair( 6987 release_id, track, tm, diff_list[lev], diff_list[lev - 1]) 6988 if diff_list[lev - 1] == '…': 6989 diff_list[lev - 1] = '' 6990 write_log( 6991 release_id, 6992 'info', 6993 "Removed duplication. Revised diff_work = %s, diff_part = %s", 6994 diff_work, 6995 diff_part) 6996 if part_levels > 0 and depth >= 1: 6997 addn_work = [] 6998 addn_part = [] 6999 for stripped_work in diff_work: 7000 if stripped_work: 7001 write_log( 7002 release_id, 'info', "Stripped work = %s", stripped_work) 7003 addn_work.append(" {" + stripped_work + "}") 7004 else: 7005 addn_work.append("") 7006 for stripped_part in diff_part: 7007 if stripped_part and stripped_part != "": 7008 write_log(release_id, 'info', "Stripped part = %s", stripped_part) 7009 addn_part.append(" {" + stripped_part + "}") 7010 else: 7011 addn_part.append("") 7012 write_log( 7013 release_id, 7014 'info', 7015 "addn_work = %s, addn_part = %s", 7016 addn_work, 7017 addn_part) 7018 ext_groupheading = work[1] + addn_work[0] 7019 ext_work = work[ref_level] + addn_work[ref_level - 1] 7020 ext_inter_work = "" 7021 inter_title_work = "" 7022 title_groupheading = tm['~cwp_title_work_1'] 7023 if ref_level > 1: 7024 for r in range(1, ref_level): 7025 if ext_inter_work: 7026 ext_inter_work = ': ' + ext_inter_work 7027 ext_inter_work = part[r] + \ 7028 addn_work[r - 1] + ext_inter_work 7029 ext_groupheading = work[ref_level] + \ 7030 addn_work[ref_level - 1] + ':: ' + ext_inter_work 7031 if title_depth > 1 and ref_level > 1: 7032 for r in range(1, min(title_depth, ref_level)): 7033 if inter_title_work: 7034 inter_title_work = ': ' + inter_title_work 7035 inter_title_work = tm['~cwp_title_part_' + 7036 str(r)] + inter_title_work 7037 title_groupheading = tm['~cwp_title_work_' + str( 7038 min(title_depth, ref_level))] + ':: ' + inter_title_work 7039 7040 else: 7041 ext_groupheading = groupheading # title will be in part 7042 ext_work = work_main 7043 ext_inter_work = inter_work 7044 inter_title_work = "" 7045 7046 write_log(release_id, 'debug', ".... ext_groupheading done") 7047 7048 if ext_groupheading: 7049 write_log( 7050 release_id, 7051 'info', 7052 "EXTENDED GROUPHEADING: %s", 7053 ext_groupheading) 7054 tm['~cwp_extended_groupheading'] = ext_groupheading 7055 tm['~cwp_extended_work'] = ext_work 7056 if ext_inter_work: 7057 tm['~cwp_extended_inter_work'] = ext_inter_work 7058 if inter_title_work: 7059 tm['~cwp_inter_title_work'] = inter_title_work 7060 if title_groupheading: 7061 tm['~cwp_title_groupheading'] = title_groupheading 7062 write_log( 7063 release_id, 7064 'info', 7065 "title_groupheading = %s", 7066 title_groupheading) 7067 # extend part from title metadata 7068 write_log( 7069 release_id, 7070 'debug', 7071 "NOW EXTEND PART...(part = %s)", 7072 part_main) 7073 if part_main: 7074 if '~cwp_title_part_0' in tm: 7075 movement = tm['~cwp_title_part_0'] 7076 else: 7077 movement = tm['~cwp_title_part_0'] or tm['~cwp_title'] or tm['title'] 7078 if '~cwp_extended_groupheading' in tm: 7079 work_compare = tm['~cwp_extended_groupheading'] + \ 7080 ': ' + part_main 7081 elif '~cwp_work_1' in tm: 7082 work_compare = work[1] + ': ' + part_main 7083 else: 7084 work_compare = work[0] 7085 diff = self.diff_pair( 7086 release_id, track, tm, work_compare, movement) 7087 # compare with the fullest possible work name, not the stripped one 7088 # - to maximise the duplication elimination 7089 reverse_diff = self.diff_pair( 7090 release_id, track, tm, movement, vanilla_part) 7091 # for the reverse comparison use the part name without any work details or annotation 7092 if diff and reverse_diff and self.parts[tuple(str_to_list(tm['~cwp_workid_0']))]['partial']: 7093 diff = movement 7094 # for partial tracks, do not eliminate the title text as it is 7095 # frequently deliberately a component of the the overall work txt 7096 # (unless it is identical) 7097 fill_part = options['cwp_fill_part'] 7098 # To fill part with title text if it 7099 # would otherwise have no text other than arrangement or partial 7100 # annotations 7101 if not diff and not vanilla_part and part_levels > 0 and fill_part: 7102 # In other words the movement will have no text other than 7103 # arrangement or partial annotations 7104 diff = movement 7105 write_log(release_id, 'info', "DIFF PART - MOVT. ti =%s", diff) 7106 write_log(release_id, 7107 'info', 7108 'medley indicator for %s is %s', 7109 tm['~cwp_workid_0'], 7110 self.parts[tuple(str_to_list(tm['~cwp_workid_0']))]['medley']) 7111 7112 if type2_medley: 7113 tm['~cwp_extended_part'] = "{" + movement + "}" 7114 else: 7115 if diff: 7116 tm['~cwp_extended_part'] = part_main + \ 7117 " {" + diff.strip() + "}" 7118 else: 7119 tm['~cwp_extended_part'] = part_main 7120 if part_levels == 0: 7121 if tm['~cwp_extended_groupheading']: 7122 del tm['~cwp_extended_groupheading'] 7123 7124 # remove unwanted groupheadings (needed them up to now for adding 7125 # extensions) 7126 if '~cwp_groupheading' in tm and tm['~cwp_groupheading'] == tm['~cwp_part']: 7127 del tm['~cwp_groupheading'] 7128 if '~cwp_title_groupheading' in tm and tm['~cwp_title_groupheading'] == tm['~cwp_title_part']: 7129 del tm['~cwp_title_groupheading'] 7130 # clean up groupheadings (may be stray separators if level 0 or title 7131 # options used) 7132 if '~cwp_groupheading' in tm: 7133 tm['~cwp_groupheading'] = tm['~cwp_groupheading'].strip( 7134 ':').strip( 7135 options['cwp_single_work_sep']).strip( 7136 options['cwp_multi_work_sep']) 7137 if '~cwp_extended_groupheading' in tm: 7138 tm['~cwp_extended_groupheading'] = tm['~cwp_extended_groupheading'].strip( 7139 ':').strip( 7140 options['cwp_single_work_sep']).strip( 7141 options['cwp_multi_work_sep']) 7142 if '~cwp_title_groupheading' in tm: 7143 tm['~cwp_title_groupheading'] = tm['~cwp_title_groupheading'].strip( 7144 ':').strip( 7145 options['cwp_single_work_sep']).strip( 7146 options['cwp_multi_work_sep']) 7147 write_log(release_id, 'debug', "....done") 7148 return movementgroup 7149 7150 ########################################################## 7151 # SECTION 6- Write metadata to tags according to options # 7152 ########################################################## 7153 7154 def publish_metadata(self, release_id, album, track, movement_info={}): 7155 """ 7156 Write out the metadata according to user options 7157 :param release_id: name for log file - usually =musicbrainz_albumid 7158 unless called outside metadata processor 7159 :param album: 7160 :param track: 7161 :param movement_info: format is {'movement-group': movementgroup, 'movement-number': movementnumber} 7162 :return: 7163 """ 7164 write_log(release_id, 'debug', "IN PUBLISH METADATA for %s", track) 7165 options = self.options[track] 7166 tm = track.metadata 7167 tm['~cwp_version'] = PLUGIN_VERSION 7168 7169 # set movement grouping tags (hidden vars) 7170 if movement_info: 7171 movementtotal = self.parts[tuple(movement_info['movement-group'])]['movement-total'] 7172 if movementtotal > 1: 7173 tm['~cwp_movt_num'] = movement_info['movement-number'] 7174 tm['~cwp_movt_tot'] = movementtotal 7175 7176 # album composers needed by map_tags (set in set_work_artists) 7177 if 'composer_lastnames' in self.album_artists[album]: 7178 last_names = seq_last_names(self, album) 7179 self.append_tag( 7180 release_id, 7181 tm, 7182 '~cea_album_composer_lastnames', 7183 last_names) 7184 7185 write_log(release_id, 'info', "Check options") 7186 if options["cwp_titles"]: 7187 write_log(release_id, 'info', "titles") 7188 part = tm['~cwp_title_part_0'] or tm['~cwp_title_work_0']or tm['~cwp_title'] or tm['title'] 7189 # for multi-level work display 7190 groupheading = tm['~cwp_title_groupheading'] or "" 7191 # for single-level work display 7192 work = tm['~cwp_title_work'] or "" 7193 inter_work = tm['~cwp_inter_title_work'] or "" 7194 elif options["cwp_works"]: 7195 write_log(release_id, 'info', "works") 7196 part = tm['~cwp_part'] 7197 groupheading = tm['~cwp_groupheading'] or "" 7198 work = tm['~cwp_work'] or "" 7199 inter_work = tm['~cwp_inter_work'] or "" 7200 else: 7201 # options["cwp_extended"] 7202 write_log(release_id, 'info', "extended") 7203 part = tm['~cwp_extended_part'] 7204 groupheading = tm['~cwp_extended_groupheading'] or "" 7205 work = tm['~cwp_extended_work'] or "" 7206 inter_work = tm['~cwp_extended_inter_work'] or "" 7207 write_log(release_id, 'info', "Done options") 7208 p1 = RE_ROMANS_AT_START 7209 # Matches positive integers with punctuation 7210 p2 = re.compile(r'^\W*\d+[.):-]') 7211 movt = part 7212 for _ in range( 7213 0, 5): # in case of multiple levels 7214 movt = p2.sub('', p1.sub('', movt)).strip() 7215 write_log(release_id, 'info', "Done movt") 7216 movt_inc_tags = options["cwp_movt_tag_inc"].split(",") 7217 movt_inc_tags = [x.strip(' ') for x in movt_inc_tags] 7218 movt_exc_tags = options["cwp_movt_tag_exc"].split(",") 7219 movt_exc_tags = [x.strip(' ') for x in movt_exc_tags] 7220 movt_inc_1_tags = options["cwp_movt_tag_inc1"].split(",") 7221 movt_inc_1_tags = [x.strip(' ') for x in movt_inc_1_tags] 7222 movt_exc_1_tags = options["cwp_movt_tag_exc1"].split(",") 7223 movt_exc_1_tags = [x.strip(' ') for x in movt_exc_1_tags] 7224 movt_no_tags = options["cwp_movt_no_tag"].split(",") 7225 movt_no_tags = [x.strip(' ') for x in movt_no_tags] 7226 movt_no_sep = options["cwp_movt_no_sep"] 7227 movt_tot_tags = options["cwp_movt_tot_tag"].split(",") 7228 movt_tot_tags = [x.strip(' ') for x in movt_tot_tags] 7229 gh_tags = options["cwp_work_tag_multi"].split(",") 7230 gh_tags = [x.strip(' ') for x in gh_tags] 7231 gh_sep = options["cwp_multi_work_sep"] 7232 work_tags = options["cwp_work_tag_single"].split(",") 7233 work_tags = [x.strip(' ') for x in work_tags] 7234 work_sep = options["cwp_single_work_sep"] 7235 top_tags = options["cwp_top_tag"].split(",") 7236 top_tags = [x.strip(' ') for x in top_tags] 7237 7238 write_log( 7239 release_id, 7240 'info', 7241 "Done splits. gh_tags: %s, work_tags: %s, movt_inc_tags: %s, movt_exc_tags: %s, movt_no_tags: %s", 7242 gh_tags, 7243 work_tags, 7244 movt_inc_tags, 7245 movt_exc_tags, 7246 movt_no_tags) 7247 7248 for tag in gh_tags + work_tags + movt_inc_tags + movt_exc_tags + movt_no_tags: 7249 tm[tag] = "" 7250 for tag in gh_tags: 7251 if tag in movt_inc_tags + movt_exc_tags + movt_no_tags: 7252 self.append_tag(release_id, tm, tag, groupheading, gh_sep) 7253 else: 7254 self.append_tag(release_id, tm, tag, groupheading) 7255 for tag in work_tags: 7256 if tag in movt_inc_1_tags + movt_exc_1_tags + movt_no_tags: 7257 self.append_tag(release_id, tm, tag, work, work_sep) 7258 else: 7259 self.append_tag(release_id, tm, tag, work) 7260 if '~cwp_part_levels' in tm and int(tm['~cwp_part_levels']) > 0: 7261 self.append_tag( 7262 release_id, 7263 tm, 7264 'show work movement', 7265 '1') # original tag for iTunes, kept for backwards compatibility 7266 self.append_tag( 7267 release_id, 7268 tm, 7269 'showmovement', 7270 '1') # new tag for iTunes & MusicBee, consistent with Picard tag docs 7271 for tag in top_tags: 7272 if '~cwp_work_top' in tm: 7273 self.append_tag(release_id, tm, tag, tm['~cwp_work_top']) 7274 7275 if '~cwp_movt_num' in tm and len(tm['~cwp_movt_num']) > 0: 7276 movt_num_punc = tm['~cwp_movt_num'] + movt_no_sep + ' ' 7277 else: 7278 movt_num_punc = '' 7279 7280 for tag in movt_no_tags: 7281 if tag not in movt_inc_tags + movt_exc_tags + movt_inc_1_tags + movt_exc_1_tags: 7282 self.append_tag(release_id, tm, tag, tm['~cwp_movt_num']) 7283 7284 for tag in movt_tot_tags: 7285 self.append_tag(release_id, tm, tag, tm['~cwp_movt_tot']) 7286 7287 for tag in movt_exc_tags: 7288 if tag in movt_no_tags: 7289 movt = movt_num_punc + movt 7290 self.append_tag(release_id, tm, tag, movt) 7291 7292 for tag in movt_inc_tags: 7293 if tag in movt_no_tags: 7294 part = movt_num_punc + part 7295 self.append_tag(release_id, tm, tag, part) 7296 7297 7298 for tag in movt_inc_1_tags + movt_exc_1_tags: 7299 if tag in movt_inc_1_tags: 7300 pt = part 7301 else: 7302 pt = movt 7303 if tag in movt_no_tags: 7304 pt = movt_num_punc + pt 7305 if inter_work and inter_work != "": 7306 if tag in movt_exc_tags + movt_inc_tags and tag != "": 7307 write_log( 7308 release_id, 7309 'warning', 7310 "Tag %s will have multiple contents", 7311 tag) 7312 if self.WARNING: 7313 self.append_tag(release_id, tm, '~cwp_warning', '6. Tag ' + 7314 tag + 7315 ' has multiple contents') 7316 self.append_tag( 7317 release_id, 7318 tm, 7319 tag, 7320 inter_work + 7321 work_sep + 7322 " " + 7323 pt) 7324 else: 7325 self.append_tag(release_id, tm, tag, pt) 7326 7327 for tag in movt_exc_tags + movt_inc_tags + movt_exc_1_tags + movt_inc_1_tags: 7328 if tag in movt_no_tags: 7329 # i.e treat as one item, not multiple 7330 tm[tag] = "".join(re.split('|'.join(self.SEPARATORS), tm[tag])) 7331 7332 # write "SongKong" tags 7333 if options['cwp_write_sk']: 7334 write_log(release_id, 'debug', "Writing SongKong work tags") 7335 if '~cwp_part_levels' in tm: 7336 part_levels = int(tm['~cwp_part_levels']) 7337 for n in range(0, part_levels + 1): 7338 if '~cwp_work_' + \ 7339 str(n) in tm and '~cwp_workid_' + str(n) in tm: 7340 source = tm['~cwp_work_' + str(n)] 7341 source_id = list( 7342 tuple(str_to_list(tm['~cwp_workid_' + str(n)]))) 7343 if n == 0: 7344 self.append_tag( 7345 release_id, tm, 'musicbrainz_work_composition', source) 7346 for source_id_item in source_id: 7347 self.append_tag( 7348 release_id, tm, 'musicbrainz_work_composition_id', source_id_item) 7349 if n == part_levels: 7350 self.append_tag( 7351 release_id, tm, 'musicbrainz_work', source) 7352 if 'musicbrainz_workid' in tm: 7353 del tm['musicbrainz_workid'] 7354 # Delete the Picard version of this tag before 7355 # replacing it with the SongKong version 7356 for source_id_item in source_id: 7357 self.append_tag( 7358 release_id, tm, 'musicbrainz_workid', source_id_item) 7359 if n != 0 and n != part_levels: 7360 self.append_tag( 7361 release_id, tm, 'musicbrainz_work_part_level' + str(n), source) 7362 for source_id_item in source_id: 7363 self.append_tag( 7364 release_id, 7365 tm, 7366 'musicbrainz_work_part_level' + 7367 str(n) + 7368 '_id', 7369 source_id_item) 7370 7371 # carry out tag mapping 7372 tm['~cea_works_complete'] = "Y" 7373 map_tags(options, release_id, album, tm) 7374 7375 write_log(release_id, 'debug', "Published metadata for %s", track) 7376 if options['cwp_options_tag'] != "": 7377 self.cwp_options = collections.defaultdict( 7378 lambda: collections.defaultdict(dict)) 7379 7380 for opt in plugin_options('workparts') + plugin_options('genres'): 7381 if 'name' in opt: 7382 if 'value' in opt: 7383 if options[opt['option']]: 7384 self.cwp_options['Classical Extras']['Works options'][opt['name']] = opt['value'] 7385 else: 7386 self.cwp_options['Classical Extras']['Works options'][opt['name'] 7387 ] = options[opt['option']] 7388 7389 write_log(release_id, 'info', "Options %s", self.cwp_options) 7390 if options['ce_version_tag'] and options['ce_version_tag'] != "": 7391 self.append_tag(release_id, tm, options['ce_version_tag'], str( 7392 'Version ' + tm['~cwp_version'] + ' of Classical Extras')) 7393 if options['cwp_options_tag'] and options['cwp_options_tag'] != "": 7394 self.append_tag(release_id, tm, options['cwp_options_tag'] + 7395 ':workparts_options', json.loads( 7396 json.dumps( 7397 self.cwp_options))) 7398 if self.ERROR and "~cwp_error" in tm: 7399 for error in str_to_list(tm['~cwp_error']): 7400 code = error[0] 7401 self.append_tag(release_id, tm, '001_errors:' + code, error) 7402 if self.WARNING and "~cwp_warning" in tm: 7403 for warning in str_to_list(tm['~cwp_warning']): 7404 wcode = warning[0] 7405 self.append_tag(release_id, tm, '002_warnings:' + wcode, warning) 7406 7407 7408 def append_tag(self, release_id, tm, tag, source, sep=None): 7409 """ 7410 pass to main append routine 7411 :param release_id: name for log file - usually =musicbrainz_albumid 7412 unless called outside metadata processor 7413 :param tm: 7414 :param tag: 7415 :param source: 7416 :param sep: separators may be used to split string into list on appending 7417 :return: 7418 """ 7419 write_log( 7420 release_id, 7421 'info', 7422 "In append_tag (Work parts). tag = %s, source = %s, sep =%s", 7423 tag, 7424 source, 7425 sep) 7426 append_tag(release_id, tm, tag, source, self.SEPARATORS) 7427 write_log( 7428 release_id, 7429 'info', 7430 "Appended. Resulting contents of tag: %s are: %s", 7431 tag, 7432 tm[tag]) 7433 7434 ################################################ 7435 # SECTION 7 - Common string handling functions # 7436 ################################################ 7437 7438 def strip_parent_from_work( 7439 self, 7440 track, 7441 release_id, 7442 work, 7443 parent, 7444 part_level, 7445 extend, 7446 parentId=None, 7447 workId=None): 7448 """ 7449 Remove common text 7450 :param track: 7451 :param release_id: name for log file - usually =musicbrainz_albumid 7452 unless called outside metadata processor 7453 :param work: could be a list of works, all of which require stripping 7454 :param parent: 7455 :param part_level: 7456 :param extend: 7457 :param parentId: 7458 :param workId: 7459 :return: 7460 """ 7461 # extend=True is used [ NO LONGER to find "full_parent" names] + (with parentId) 7462 # to trigger recursion if unable to strip parent name from work and also to look for common subsequences 7463 # extend=False is used when this routine is called for other purposes 7464 # than strict work: parent relationships 7465 options = self.options[track] 7466 write_log( 7467 release_id, 7468 'debug', 7469 "STRIPPING HIGHER LEVEL WORK TEXT FROM PART NAMES") 7470 write_log( 7471 release_id, 7472 'info', 7473 'PARAMS: WORK = %r, PARENT = %s, PART_LEVEL = %s, EXTEND= %s', 7474 work, 7475 parent, 7476 part_level, 7477 extend) 7478 if isinstance(work, list): 7479 result = [] 7480 for w, work_item in enumerate(work): 7481 if workId and isinstance(workId, list): 7482 sub_workId = workId[w] 7483 else: 7484 sub_workId = workId 7485 result.append( 7486 self.strip_parent_from_work( 7487 track, 7488 release_id, 7489 work_item, 7490 parent, 7491 part_level, 7492 extend, 7493 parentId, 7494 sub_workId)[0]) 7495 return result, parent 7496 if not isinstance(parent, str): 7497 # in case it is a list - make sure it is a string 7498 parent = '; '.join(parent) 7499 if not isinstance(work, str): 7500 work = '; '.join(work) 7501 7502 # replace any punctuation or numbers, with a space (to remove any 7503 # inconsistent punctuation and numbering) - (?u) specifies the 7504 # re.UNICODE flag in sub 7505 clean_parent = re.sub("(?u)[\W]", ' ', parent) 7506 # now allow the spaces to be filled with up to 2 non-letters 7507 pattern_parent = clean_parent.replace(" ", "\W{0,2}") 7508 pattern_parent = "(^|.*?\s)(\W*" + pattern_parent + "\W?)(.*)" 7509 # (removed previous alternative pattern for extend=true, owing to catastrophic backtracking) 7510 write_log( 7511 release_id, 7512 'info', 7513 "Pattern parent: %s, Work: %s", 7514 pattern_parent, 7515 work) 7516 p = re.compile(pattern_parent, re.IGNORECASE | re.UNICODE) 7517 m = p.search(work) 7518 if m: 7519 write_log(release_id, 'info', "Matched...") 7520 if m.group(1): 7521 stripped_work = m.group(1) + u"\u2026" + m.group(3) 7522 else: 7523 stripped_work = m.group(3) 7524 # may not have a full work name in the parent (missing op. no. 7525 # etc.) 7526 stripped_work = stripped_work.lstrip(":;,.- ") 7527 else: 7528 write_log(release_id, 'info', "No match...") 7529 stripped_work = work 7530 7531 if extend and options['cwp_common_chars'] > 0: 7532 # try stripping out a common substring (multiple times until 7533 # nothing more stripped) 7534 prev_stripped_work = '' 7535 counter = 1 7536 while prev_stripped_work != stripped_work: 7537 if counter > 20: 7538 break # in case something went awry 7539 prev_stripped_work = stripped_work 7540 parent_tuples = self.listify(release_id, track, parent) 7541 parent_words = parent_tuples['s_tuple'] 7542 clean_parent_words = list(parent_tuples['s_test_tuple']) 7543 for w, word in enumerate(clean_parent_words): 7544 clean_parent_words[w] = self.boil(release_id, word) 7545 work_tuples = self.listify( 7546 release_id, track, stripped_work) 7547 work_words = work_tuples['s_tuple'] 7548 clean_work_words = list(work_tuples['s_test_tuple']) 7549 for w, word in enumerate(clean_work_words): 7550 clean_work_words[w] = self.boil(release_id, word) 7551 common_dets = longest_common_substring( 7552 clean_work_words, clean_parent_words) 7553 # this is actually a list, not a string, since list 7554 # arguments were supplied 7555 common_seq = common_dets['string'] 7556 seq_length = common_dets['length'] 7557 seq_start = common_dets['start'] 7558 # the original items (before 'cleaning') 7559 full_common_seq = [ 7560 x.group() for x in work_words[seq_start:seq_start + seq_length]] 7561 # number of words in common_seq 7562 full_seq_length = sum([len(x.split()) 7563 for x in full_common_seq]) 7564 write_log( 7565 release_id, 7566 'info', 7567 'Checking common sequence between parent and work, iteration %s ... parent_words = %s', 7568 counter, 7569 parent_words) 7570 write_log( 7571 release_id, 7572 'info', 7573 '... longest common sequence = %s', 7574 common_seq) 7575 if full_seq_length > 0: 7576 potential_stripped_work = stripped_work 7577 if seq_start > 0: 7578 ellipsis = ' ' + u"\u2026" + ' ' 7579 else: 7580 ellipsis = '' 7581 if counter > 1: 7582 potential_stripped_work = stripped_work.rstrip( 7583 ' :,-\u2026') 7584 potential_stripped_work = potential_stripped_work.replace( 7585 '(\u2026)', '').rstrip() 7586 potential_stripped_work = potential_stripped_work[:work_words[seq_start].start( 7587 )] + ellipsis + potential_stripped_work[work_words[seq_start + seq_length - 1].end():] 7588 potential_stripped_work = potential_stripped_work.lstrip( 7589 ' :,-') 7590 potential_stripped_work = re.sub( 7591 r'(\W*…\W*)(\W*…\W*)', ' … ', potential_stripped_work) 7592 potential_stripped_work = strip_excess_punctuation( 7593 potential_stripped_work) 7594 7595 if full_seq_length >= options['cwp_common_chars'] \ 7596 or potential_stripped_work == '' and options['cwp_allow_empty_parts']: 7597 # Make sure it is more than the required min (it will be > 0 anyway) 7598 # unless a full strip will result anyway (and blank 7599 # part names are allowed) 7600 stripped_work = potential_stripped_work 7601 if not stripped_work or stripped_work == '': 7602 if workId and \ 7603 ('arrangement' in self.parts[workId] and self.parts[workId]['arrangement'] 7604 and options['cwp_arrangements'] and options['cwp_arrangements_text']) \ 7605 or ('partial' in self.parts[workId] and self.parts[workId]['partial'] 7606 and options['cwp_partial'] and options['cwp_partial_text']) \ 7607 and options['cwp_allow_empty_parts']: 7608 pass 7609 else: 7610 stripped_work = prev_stripped_work # do not allow empty parts 7611 counter += 1 7612 stripped_work = strip_excess_punctuation(stripped_work) 7613 write_log( 7614 release_id, 7615 'info', 7616 'stripped_work = %s', 7617 stripped_work) 7618 if extend and parentId and parentId in self.works_cache: 7619 write_log( 7620 release_id, 7621 'info', 7622 "Looking for match at next level up") 7623 grandparentIds = tuple(self.works_cache[parentId]) 7624 grandparent = self.parts[grandparentIds]['name'] 7625 stripped_work = self.strip_parent_from_work( 7626 track, 7627 release_id, 7628 stripped_work, 7629 grandparent, 7630 part_level, 7631 True, 7632 grandparentIds, 7633 workId)[0] 7634 7635 write_log( 7636 release_id, 7637 'info', 7638 "Finished strip_parent_from_work, Work: %s", 7639 work) 7640 write_log(release_id, 'info', "Stripped work: %s", stripped_work) 7641 # Changed full_parent to parent after removal of 'extend' logic above 7642 stripped_work = strip_excess_punctuation(stripped_work) 7643 write_log(release_id, 'info', "Stripped work after punctuation removal: %s", stripped_work) 7644 return stripped_work, parent 7645 7646 def diff_pair( 7647 self, 7648 release_id, 7649 track, 7650 tm, 7651 mb_item, 7652 title_item, 7653 remove_numbers=True): 7654 """ 7655 Removes common text (or synonyms) from title item 7656 :param release_id: name for log file - usually =musicbrainz_albumid 7657 unless called outside metadata processor 7658 :param track: 7659 :param tm: 7660 :param mb_item: 7661 :param title_item: 7662 :param remove_numbers: remove movement numbers when comparing (not currently called with False by anything) 7663 :return: Reduced title item 7664 """ 7665 write_log(release_id, 'debug', "Inside DIFF_PAIR") 7666 mb = mb_item.strip() 7667 write_log(release_id, 'info', "mb = %s", mb) 7668 write_log(release_id, 'info', "title_item = %s", title_item) 7669 if not mb: 7670 write_log( 7671 release_id, 7672 'info', 7673 'End of DIFF_PAIR. Returning %s', 7674 None) 7675 return None 7676 ti = title_item.strip(" :;-.,") 7677 if ti.count('"') == 1: 7678 ti = ti.strip('"') 7679 if ti.count("'") == 1: 7680 ti = ti.strip("'") 7681 write_log(release_id, 'info', "ti (amended) = %s", ti) 7682 if not ti: 7683 write_log( 7684 release_id, 7685 'info', 7686 'End of DIFF_PAIR. Returning %s', 7687 None) 7688 return None 7689 7690 if self.options[track]["cwp_removewords_p"]: 7691 removewords = self.options[track]["cwp_removewords_p"].split(',') 7692 else: 7693 removewords = [] 7694 write_log(release_id, 'info', "Prefixes = %s", removewords) 7695 # remove numbers, roman numerals, part etc and punctuation from the 7696 # start 7697 write_log(release_id, 'info', "checking prefixes") 7698 found_prefix = True 7699 i = 0 7700 while found_prefix: 7701 if i > 20: 7702 break # safety valve 7703 found_prefix = False 7704 for prefix in removewords: 7705 if prefix[0] != " ": 7706 prefix2 = str(prefix).lower().lstrip() 7707 write_log( 7708 release_id, 'info', "checking prefix %s", prefix2) 7709 if mb.lower().startswith(prefix2): 7710 found_prefix = True 7711 mb = mb[len(prefix2):] 7712 if ti.lower().startswith(prefix2): 7713 found_prefix = True 7714 ti = ti[len(prefix2):] 7715 mb = mb.strip() 7716 ti = ti.strip() 7717 i += 1 7718 write_log( 7719 release_id, 7720 'info', 7721 "pairs after prefix strip iteration %s. mb = %s, ti = %s", 7722 i, 7723 mb, 7724 ti) 7725 write_log(release_id, 'info', "Prefixes checked") 7726 7727 # replacements 7728 replacements = self.replacements[track] 7729 write_log(release_id, 'info', "Replacement: %s", replacements) 7730 for tup in replacements: 7731 for ind in range(0, len(tup) - 1): 7732 ti = re.sub(tup[ind], tup[-1], ti, flags=re.IGNORECASE) 7733 write_log( 7734 release_id, 7735 'debug', 7736 'Looking for any new words in the title') 7737 7738 write_log( 7739 release_id, 7740 'info', 7741 "Check before splitting: mb = %s, ti = %s", 7742 mb, 7743 ti) 7744 7745 ti_tuples = self.listify(release_id, track, ti) 7746 ti_tuple = ti_tuples['s_tuple'] 7747 ti_test_tuple = ti_tuples['s_test_tuple'] 7748 7749 mb_tuples = self.listify(release_id, track, mb) 7750 mb_test_tuple = mb_tuples['s_test_tuple'] 7751 7752 write_log( 7753 release_id, 7754 'info', 7755 "Check after splitting: mb_test = %s, ti = %s, ti_test = %s", 7756 mb_test_tuple, 7757 ti_tuple, 7758 ti_test_tuple) 7759 7760 ti_stencil = self.stencil(release_id, ti_tuple, ti) 7761 ti_list = ti_stencil['match list'] 7762 ti_list_punc = ti_stencil['gap list'] 7763 ti_test_list = list(ti_test_tuple) 7764 if ti_stencil['dummy']: 7765 # to deal with case where stencil has added a dummy item at the 7766 # start 7767 ti_test_list.insert(0, '') 7768 write_log(release_id, 'info', 'ti_test_list = %r', ti_test_list) 7769 # zip is an iterable, not a list in Python 3, so make it re-usable 7770 ti_zip_list = list(zip(ti_list, ti_list_punc)) 7771 7772 # len(ti_list) should be = len(ti_test_list) as only difference should 7773 # be synonyms which are each one 'word' 7774 # However, because of the grouping of some words via regex, it is possible that inconsistencies might arise 7775 # Therefore, there is a test here to check for equality and produce an 7776 # error message (but continue processing) 7777 if len(ti_list) != len(ti_test_list): 7778 write_log( 7779 release_id, 7780 'error', 7781 'Mismatch in title list after canonization/synonymization') 7782 write_log( 7783 release_id, 7784 'error', 7785 'Orig. title list = %r. Test list = %r', 7786 ti_list, 7787 ti_test_list) 7788 # mb_test_tuple = self.listify(release_id, track, mb_test) 7789 mb_list2 = list(mb_test_tuple) 7790 for index, mb_bit2 in enumerate(mb_list2): 7791 mb_list2[index] = self.boil(release_id, mb_bit2) 7792 write_log( 7793 release_id, 7794 'info', 7795 "mb_list2[%s] = %s", 7796 index, 7797 mb_list2[index]) 7798 ti_new = [] 7799 ti_rich_list = [] 7800 for i, ti_bit_test in enumerate(ti_test_list): 7801 if i <= len(ti_list) - 1: 7802 ti_bit = ti_zip_list[i] 7803 # NB ti_bit is a tuple where the word (1st item) is grouped 7804 # with its following punctuation (2nd item) 7805 else: 7806 ti_bit = ('', '') 7807 write_log( 7808 release_id, 7809 'info', 7810 "i = %s, ti_bit_test = %s, ti_bit = %s", 7811 i, 7812 ti_bit_test, 7813 ti_bit) 7814 ti_rich_list.append((ti_bit, True)) 7815 # Boolean to indicate whether ti_bit is a new word 7816 7817 if ti_bit_test == '': 7818 ti_rich_list[i] = (ti_bit, False) 7819 else: 7820 if self.boil(release_id, ti_bit_test) in mb_list2: 7821 ti_rich_list[i] = (ti_bit, False) 7822 7823 if remove_numbers: # Only remove numbers at the start if they are not new items 7824 p0 = re.compile(r'\b\w+\b') 7825 p1 = RE_ROMANS 7826 p2 = re.compile(r'^\d+') # Matches positive integers 7827 starts_with_numeral = True 7828 while starts_with_numeral: 7829 starts_with_numeral = False 7830 if ti_rich_list and p0.match(ti_rich_list[0][0][0]): 7831 start_word = p0.match(ti_rich_list[0][0][0]).group() 7832 if p1.match(start_word) or p2.match(start_word): 7833 if not ti_rich_list[0][1]: 7834 starts_with_numeral = True 7835 ti_rich_list.pop(0) 7836 ti_test_list.pop(0) 7837 7838 write_log( 7839 release_id, 7840 'info', 7841 "ti_rich_list before removing singletons = %s. length = %s", 7842 ti_rich_list, 7843 len(ti_rich_list)) 7844 7845 s = 0 7846 index = 0 7847 change = () 7848 for i, (t, n) in enumerate(ti_rich_list): 7849 if n: 7850 s += 1 7851 index = i 7852 change = t # NB this is a tuple 7853 7854 p = self.options[track]["cwp_proximity"] 7855 ep = self.options[track]["cwp_end_proximity"] 7856 # NB these may be modified later 7857 7858 if s == 1: 7859 if 0 < index < len(ti_rich_list) - 1: 7860 # ignore singleton new words in middle of title unless they are 7861 # within "cwp_end_proximity" from the start or end 7862 write_log( 7863 release_id, 'info', 'item length is %s', len( 7864 change[0].split())) 7865 # also make sure that the item is just one word before 7866 # eliminating 7867 if ep < index < len(ti_rich_list) - ep - \ 7868 1 and len(change[0].split()) == 1: 7869 ti_rich_list[index] = (change, False) 7870 s = 0 7871 7872 # remove prepositions 7873 write_log( 7874 release_id, 7875 'info', 7876 "ti_rich_list before removing prepositions = %s. length = %s", 7877 ti_rich_list, 7878 len(ti_rich_list)) 7879 if self.options[track]["cwp_prepositions"]: 7880 prepositions_fat = self.options[track]["cwp_prepositions"].split( 7881 ',') 7882 prepositions = [w.strip() for w in prepositions_fat] 7883 for i, ti_bit_test in enumerate( 7884 reversed(ti_test_list)): # Need to reverse it to check later prepositions first 7885 if ti_bit_test.lower().strip() in prepositions: 7886 # NB i is counting up while traversing the list backwards 7887 j = len(ti_rich_list) - i - 1 7888 if i == 0 or not ti_rich_list[j + 1][1]: 7889 # Don't make it false if it is preceded by a 7890 # non-preposition new word 7891 if not (j > 0 and ti_rich_list[j - 7892 1][1] and ti_test_list[j - 7893 1].lower() not in prepositions): 7894 ti_rich_list[j] = (ti_rich_list[j][0], False) 7895 7896 # create comparison for later usage 7897 compare_string = '' 7898 for item in ti_rich_list: 7899 if item[1]: 7900 compare_string += item[0][0] 7901 ti_compare = self.boil(release_id, compare_string) 7902 compare_length = len(ti_compare) 7903 7904 write_log( 7905 release_id, 7906 'info', 7907 "ti_rich_list before gapping (True indicates a word in title not in MB work) = %s. length = %s", 7908 ti_rich_list, 7909 len(ti_rich_list)) 7910 if s > 0: 7911 d = p - ep 7912 start = True # To keep track of new words at the start of the title 7913 for i, (ti_bit, new) in enumerate(ti_rich_list): 7914 if not new: 7915 write_log( 7916 release_id, 7917 'info', 7918 "item(i = %s) val = %s - not new. proximity param = %s, end_proximity param = %s", 7919 i, 7920 ti_bit, 7921 p, 7922 ep) 7923 if start: 7924 prox_test = ep 7925 else: 7926 prox_test = p 7927 if prox_test > 0: 7928 for j in range(0, prox_test + 1): 7929 write_log(release_id, 'info', "item(i) = %s, look-ahead(j) = %s", i, j) 7930 if i + j < len(ti_rich_list): 7931 if ti_rich_list[i + j][1]: 7932 write_log( 7933 release_id, 'info', "Set to true..") 7934 ti_rich_list[i] = (ti_bit, True) 7935 write_log( 7936 release_id, 'info', "...set OK") 7937 else: 7938 if j <= p - d: 7939 ti_rich_list[i] = (ti_bit, True) 7940 else: 7941 p = self.options[track]["cwp_proximity"] 7942 start = False 7943 if not ti_rich_list[i][1]: 7944 p -= 1 7945 ep -= 1 7946 write_log( 7947 release_id, 7948 'info', 7949 "ti_rich_list after gapping (True indicates new words plus infills) = %s", 7950 ti_rich_list) 7951 nothing_new = True 7952 for (ti_bit, new) in ti_rich_list: 7953 if new: 7954 nothing_new = False 7955 new_prev = True 7956 break 7957 if nothing_new: 7958 write_log( 7959 release_id, 7960 'info', 7961 'End of DIFF_PAIR. Returning %s', 7962 None) 7963 return None 7964 else: 7965 new_prev = False 7966 for i, (ti_bit, new) in enumerate(ti_rich_list): 7967 write_log(release_id, 'info', "Create new for %s?", ti_bit) 7968 if new: 7969 write_log(release_id, 'info', "Yes for %s", ti_bit) 7970 if not new_prev: 7971 if i > 0: 7972 # check to see if the last char of the prev 7973 # punctuation group needs to be added first 7974 if len(ti_rich_list[i - 1][0][1]) > 1: 7975 # i.e. ti_bit[1][-1] of previous loop 7976 ti_new.append(ti_rich_list[i - 1][0][1][-1]) 7977 ti_new.append(ti_bit[0]) 7978 if len(ti_bit[1]) > 1: 7979 if i < len(ti_rich_list) - 1: 7980 if ti_rich_list[i + 1][1]: 7981 ti_new.append(ti_bit[1]) 7982 else: 7983 ti_new.append(ti_bit[1][:-1]) 7984 else: 7985 ti_new.append(ti_bit[1]) 7986 else: 7987 ti_new.append(ti_bit[1]) 7988 write_log( 7989 release_id, 7990 'info', 7991 "appended %s. ti_new is now %s", 7992 ti_bit, 7993 ti_new) 7994 else: 7995 write_log(release_id, 'info', "Not for %s", ti_bit) 7996 if new != new_prev: 7997 ti_new.append(u"\u2026" + ' ') 7998 7999 new_prev = new 8000 if ti_new: 8001 write_log(release_id, 'info', "ti_new %s", ti_new) 8002 ti = ''.join(ti_new) 8003 write_log(release_id, 'info', "New text from title = %s", ti) 8004 else: 8005 write_log(release_id, 'info', "New text empty") 8006 write_log( 8007 release_id, 8008 'info', 8009 'End of DIFF_PAIR. Returning %s', 8010 None) 8011 return None 8012 # see if there is any significant difference between the strings 8013 if ti: 8014 nopunc_ti = ti_compare # was = self.boil(release_id, ti) 8015 # not necessary as already set? 8016 nopunc_mb = self.boil(release_id, mb) 8017 # ti_len = len(nopunc_ti) use compare_length instead (= len before 8018 # removals and additions) 8019 substring_proportion = float( 8020 self.options[track]["cwp_substring_match"]) / 100 8021 sub_len = compare_length * substring_proportion 8022 if substring_proportion < 1: 8023 write_log(release_id, 'info', "test sub....") 8024 lcs = longest_common_substring(nopunc_mb, nopunc_ti)['string'] 8025 write_log( 8026 release_id, 8027 'info', 8028 "Longest common substring is: %s. Threshold length is %s", 8029 lcs, 8030 sub_len) 8031 if len(lcs) >= sub_len: 8032 write_log( 8033 release_id, 8034 'info', 8035 'End of DIFF_PAIR. Returning %s', 8036 None) 8037 return None 8038 write_log(release_id, 'info', "...done, ti =%s", ti) 8039 # remove duplicate successive words (and remove first word of title 8040 # item if it duplicates last word of mb item) 8041 if ti: 8042 ti_list_new = re.split(' ', ti) 8043 ti_list_ref = ti_list_new 8044 ti_bit_prev = None 8045 for i, ti_bit in enumerate(ti_list_ref): 8046 if ti_bit != "...": 8047 8048 if i > 1: 8049 if self.boil( 8050 release_id, ti_bit) == self.boil( 8051 release_id, ti_bit_prev): 8052 dup = ti_list_new.pop(i) 8053 write_log(release_id, 'info', "...removed dup %s", dup) 8054 8055 ti_bit_prev = ti_bit 8056 if ti_list_new and mb_list2: 8057 write_log(release_id, 8058 'info', 8059 "1st word of ti = %s. Last word of mb = %s", 8060 ti_list_new[0], 8061 mb_list2[-1]) 8062 if self.boil(release_id, ti_list_new[0]) == mb_list2[-1]: 8063 write_log(release_id, 'info', "Removing 1st word from ti...") 8064 first = ti_list_new.pop(0) 8065 write_log(release_id, 'info', "...removed %s", first) 8066 else: 8067 write_log( 8068 release_id, 8069 'info', 8070 'End of DIFF_PAIR. Returning %s', 8071 None) 8072 return None 8073 if ti_list_new: 8074 ti = ' '.join(ti_list_new) 8075 else: 8076 write_log( 8077 release_id, 8078 'info', 8079 'End of DIFF_PAIR. Returning %s', 8080 None) 8081 return None 8082 # remove excess brackets and punctuation 8083 if ti: 8084 ti = strip_excess_punctuation(ti) 8085 write_log(release_id, 'info', "stripped punc ok. ti = %s", ti) 8086 write_log( 8087 release_id, 8088 'debug', 8089 "DIFF_PAIR is returning ti = %s", 8090 ti) 8091 if ti and len(ti) > 0: 8092 write_log( 8093 release_id, 8094 'info', 8095 'End of DIFF_PAIR. Returning %s', 8096 ti) 8097 return ti 8098 else: 8099 write_log( 8100 release_id, 8101 'info', 8102 'End of DIFF_PAIR. Returning %s', 8103 None) 8104 return None 8105 8106 8107 @staticmethod 8108 def canonize_opus(release_id, track, s): 8109 """ 8110 make opus numbers etc. into one-word items 8111 :param release_id: 8112 :param track: 8113 :param s: A string 8114 :return: 8115 """ 8116 write_log(release_id, 'debug', 'Canonizing: %s', s) 8117 # Canonize catalogue & opus numbers (e.g. turn K. 126 into K126 or K 8118 # 345a into K345a or op. 144 into op144): 8119 regex = re.compile( 8120 r'\b((?:op|no|k|kk|kv|L|B|Hob|S|D|M)|\w+WV)\W?\s?(\d+\-?\u2013?\u2014?\d*\w*)\b', 8121 re.IGNORECASE) 8122 regex_match = regex.search(s) 8123 s_canon = s 8124 if regex_match and len(regex_match.groups()) == 2: 8125 pt1 = regex_match.group(1) or '' 8126 pt2 = regex_match.group(2) or '' 8127 if regex_match.group(1) and regex_match.group(2): 8128 pt1 = re.sub( 8129 r'^\W*no\b', 8130 '', 8131 regex_match.group(1), 8132 flags=re.IGNORECASE) 8133 s_canon = pt1 + pt2 8134 write_log(release_id, 'info', 'canonized item = %s', s_canon) 8135 return s_canon 8136 8137 @staticmethod 8138 def canonize_key(release_id, track, s): 8139 """ 8140 make keys into standardized one-word items 8141 :param release_id: 8142 :param track: 8143 :param s: A string 8144 :return: 8145 """ 8146 write_log(release_id, 'debug', 'Canonizing: %s', s) 8147 match = RE_KEYS.search(s) 8148 s_canon = s 8149 if match: 8150 if match.group(2): 8151 k2 = re.sub( 8152 r'\-sharp|\u266F', 8153 'sharp', 8154 match.group(2), 8155 flags=re.IGNORECASE) 8156 k2 = re.sub(r'\-flat|\u266D', 'flat', k2, flags=re.IGNORECASE) 8157 k2 = k2.replace('-', '') 8158 else: 8159 k2 = '' 8160 if not match.group(3) or match.group( 8161 3).strip() == '': # if the scale is not given, assume it is the major key 8162 if match.group(1).isupper( 8163 ) or k2 != '': # but only if it is upper case or has an accent 8164 k3 = 'major' 8165 else: 8166 k3 = '' 8167 else: 8168 k3 = match.group(3).strip() 8169 s_canon = match.group(1).strip() + k2.strip() + k3 8170 write_log(release_id, 'info', 'canonized item = %s', s_canon) 8171 return s_canon 8172 8173 @staticmethod 8174 def canonize_synonyms(release_id, tuples, s): 8175 """ 8176 make synonyms equal 8177 :param release_id: 8178 :param tuples 8179 :param s: A string 8180 :return: 8181 """ 8182 write_log(release_id, 'debug', 'Canonizing: %s', s) 8183 s_canon = s 8184 syn_patterns = [] 8185 syn_subs = [] 8186 for syn_tup in tuples: 8187 syn_pattern = r'((?:^|\W)' + \ 8188 r'(?:$|\W)|(?:^|\W)'.join(syn_tup) + r'(?:$|\W))' 8189 syn_patterns.append(syn_pattern) 8190 # to get the last synonym in the tuple - the canonical form 8191 syn_sub = syn_tup[-1:][0] 8192 syn_subs.append(syn_sub) 8193 for syn_ind, pattern in enumerate(syn_patterns): 8194 regex = re.compile(pattern, re.IGNORECASE) 8195 regex_match = regex.search(s) 8196 if regex_match: 8197 test_reg = regex_match.group().strip() 8198 s_canon = s_canon.replace(test_reg, syn_subs[syn_ind]) 8199 8200 write_log(release_id, 'info', 'canonized item = %s', s_canon) 8201 return s_canon 8202 8203 def find_synonyms(self, release_id, track, reg_item): 8204 """ 8205 extend regex item to include synonyms 8206 :param release_id: 8207 :param track: 8208 :param reg_item: A regex portion 8209 :return: reg_new: A replacement for reg_item that includes all its synonyms 8210 (if reg_item matches the last in a synonym tuple) 8211 """ 8212 write_log(release_id, 'debug', 'Finding synonyms of: %s', reg_item) 8213 syn_others = [] 8214 syn_all = [] 8215 for syn_tup in self.synonyms[track]: 8216 # to get the last synonym in the tuple - the canonical form 8217 syn_last = syn_tup[-1:][0] 8218 if re.match(r'^\s*' + reg_item + r'\s*$', syn_last, re.IGNORECASE): 8219 syn_others += syn_tup[:-1] 8220 syn_all += syn_tup 8221 if syn_others: 8222 reg_item = '(?:' + ')|(?:'.join(syn_others) + \ 8223 ')|(?:' + reg_item + ')' 8224 8225 write_log(release_id, 'info', 'new regex item = %s', reg_item) 8226 return reg_item, syn_all 8227 8228 def listify(self, release_id, track, s): 8229 """ 8230 Turn a string into a list of 'words', where words may also be phrases which 8231 are then 'canonized' - i.e. turned into equivalents for comparison purposes 8232 :param release_id: 8233 :param track: 8234 :param s: string 8235 :return: s_tuple: a tuple of all the **match objects** (re words and defined phrases) 8236 s_test_tuple: a tuple of the matched and canonized words and phrases (i.e. a tuple of strings, not objects) 8237 """ 8238 tuples = self.synonyms[track] 8239 # just list anything that is a synonym (with word boundary markers) 8240 syn_pattern = '|'.join( 8241 [r'(?:^|\W|\b)' + x + r'(?:$|\W)' for y in self.synonyms[track] for x in y]) 8242 op = self.find_synonyms( 8243 release_id, 8244 track, 8245 r'(?:op|no|k|kk|kv|L|B|Hob|S|D|M|\w+WV)') 8246 op_groups = op[0] 8247 op_all = op[1] 8248 notes = self.find_synonyms(release_id, track, r'[ABCDEFG]') 8249 notes_groups = notes[0] 8250 notes_all = notes[1] 8251 sharp = self.find_synonyms(release_id, track, r'sharp') 8252 sharp_groups = sharp[0] 8253 sharp_all = sharp[1] 8254 flat = self.find_synonyms(release_id, track, r'flat') 8255 flat_groups = flat[0] 8256 flat_all = flat[1] 8257 major = self.find_synonyms(release_id, track, r'major') 8258 major_groups = major[0] 8259 major_all = major[1] 8260 minor = self.find_synonyms(release_id, track, r'minor') 8261 minor_groups = minor[0] 8262 minor_all = minor[1] 8263 opus_pattern = r"(?:\b((?:(" + op_groups + \ 8264 r"))\W?\s?\d+\-?\u2013?\u2014?\d*\w*)\b)" 8265 note_pattern = r"(\b" + notes_groups + r")" 8266 accent_pattern = r"(?:\-(" + sharp_groups + r")(?:\s+|\b)|\-(" + flat_groups + r")(?:\s+|\b)|\s(" + sharp_groups + \ 8267 r")(?:\s+|\b)|\s(" + flat_groups + r")(?:\s+|\b)|\u266F(?:\s+|\b)|\u266D(?:\s+|\b)|(?:[:,.]?\s+|$|\-))" 8268 scale_pattern = r"(?:((" + major_groups + \ 8269 r")|(" + minor_groups + r"))?\b)" 8270 key_pattern = note_pattern + accent_pattern + scale_pattern 8271 hyphen_split_pattern = r"(?:\b|\"|\')(\w+['’]?\w*)|(?:\b\w+\b)|(\B\&\B)" 8272 # treat em-dash and en-dash as hyphens 8273 hyphen_embed_pattern = r"(?:\b|\"|\')(\w+['’\-\u2013\u2014]?\w*)|(?:\b\w+\b)|(\B\&\B)" 8274 8275 # The regex is split into two iterations as putting it all together can have unpredictable consequences 8276 # - may match synonyms before op's even though that is later in the string 8277 8278 # First match the op's and keys 8279 regex_1 = opus_pattern + r"|(" + key_pattern + r")" 8280 matches_1 = re.finditer(regex_1, s, re.UNICODE | re.IGNORECASE) 8281 s_list = [] 8282 s_test_list = [] 8283 s_scrubbed = s 8284 all_synonyms_lists = [ 8285 op_all, 8286 notes_all, 8287 sharp_all, 8288 flat_all, 8289 sharp_all, 8290 flat_all, 8291 major_all, 8292 minor_all] 8293 matches_list = [2, 4, 5, 6, 7, 8, 10, 11] 8294 for match in matches_1: 8295 test_a = match.group() 8296 match_a = [] 8297 match_a.append(match.group()) 8298 for j in range(1, 12): 8299 match_a.append(match.group(j)) 8300 # 0. overall match 8301 # 1. overall opus match 8302 # 2. 2-char op match 8303 # 3. overall key match 8304 # 4. note match 8305 # 5. hyphenated sharp match 8306 # 6. hyphenated flat match 8307 # 7. non-hyphenated sharp match 8308 # 8. non-hyphenated flat match 8309 # 9. overall scale match 8310 # 10. major match 8311 # 11. minor match 8312 for i, all_synonyms_list in enumerate(all_synonyms_lists): 8313 if all_synonyms_list and match_a[matches_list[i]]: 8314 match_regex = [re.match(pattern, match_a[matches_list[i]], re.IGNORECASE).group() 8315 for pattern in all_synonyms_list 8316 if re.match(pattern, match_a[matches_list[i]], re.IGNORECASE)] 8317 if match_regex: 8318 match_a[matches_list[i]] = self.canonize_synonyms( 8319 release_id, tuples, match_a[matches_list[i]]) 8320 test_a = re.sub(r"\b" + match_regex[0] + r"(?:\b|$|\s|\.)", 8321 match_a[matches_list[i]], 8322 test_a, flags=re.IGNORECASE) 8323 if match_a[1]: 8324 clean_opus = test_a.strip(' ,.:;/-?"') 8325 test_a = re.sub( 8326 re.escape(clean_opus), 8327 self.canonize_opus( 8328 release_id, 8329 track, 8330 clean_opus), 8331 test_a, 8332 flags=re.IGNORECASE) 8333 if match_a[3]: 8334 clean_key = test_a.strip(' ,.:;/-?"') 8335 test_a = re.sub( 8336 re.escape(clean_key), 8337 self.canonize_key( 8338 release_id, 8339 track, 8340 clean_key), 8341 test_a, 8342 flags=re.IGNORECASE) 8343 8344 s_test_list.append(test_a) 8345 s_list.append(match) 8346 s_scrubbed_list = list(s_scrubbed) 8347 for char in range(match.start(), match.end()): 8348 if len(s_scrubbed_list) >= match.end(): # belt and braces 8349 s_scrubbed_list[char] = '#' 8350 s_scrubbed = ''.join(s_scrubbed_list) 8351 8352 # Then match the synonyms and remaining words 8353 if self.options[track]["cwp_split_hyphenated"]: 8354 regex_2 = r"(" + syn_pattern + r")|" + hyphen_split_pattern 8355 # allow ampersands and non-latin characters as word characters. Treat apostrophes as part of words. 8356 # Treat opus and catalogue entries - e.g. K. 657 or OP.5 or op. 35a or CD 144 or BWV 243a - as one word 8357 # also treat ranges of opus numbers (connected by dash, en dash or 8358 # em dash) as one word 8359 else: 8360 regex_2 = r"(" + syn_pattern + r")|" + hyphen_embed_pattern 8361 # as previous but also treat embedded hyphens as part of words. 8362 matches_2 = re.finditer( 8363 regex_2, s_scrubbed, re.UNICODE | re.IGNORECASE) 8364 for match in matches_2: 8365 if match.group(1) and match.group(1) == match.group(): 8366 s_test_list.append( 8367 self.canonize_synonyms( 8368 release_id, 8369 tuples, 8370 match.group(1))) # synonym 8371 else: 8372 s_test_list.append(match.group()) 8373 s_list.append(match) 8374 if s_list: 8375 s_zip = list(zip(s_list, s_test_list)) 8376 s_list, s_test_list = zip( 8377 *sorted(s_zip, key=lambda tup: tup[0].start())) 8378 s_tuple = tuple(s_list) 8379 s_test_tuple = tuple(s_test_list) 8380 return {'s_tuple': s_tuple, 's_test_tuple': s_test_tuple} 8381 8382 def get_text_tuples(self, release_id, track, text_type): 8383 """ 8384 Return synonym or 'replacement' tuples 8385 :param release_id: 8386 :param track: 8387 :param text_type: 'replacements' or 'synonyms' 8388 Note that code in this method refers to synonyms (as that was written first), but applies equally to replacements and ui_tags 8389 :return: 8390 """ 8391 tm = track.metadata 8392 strsyns = re.split(r'(?<!\\)/', 8393 self.options[track]["cwp_" + text_type]) 8394 synonyms = [] 8395 for syn in strsyns: 8396 tup_match = re.search(r'\((.*)\)', syn) 8397 if tup_match: 8398 # to ignore escaped commas 8399 tup = re.split(r'(?<!\\),', tup_match.group(1)) 8400 else: 8401 tup = '' 8402 if len(tup) >= 2: 8403 for i, ts in enumerate(tup): 8404 tup[i] = ts.strip("' ").strip('"') 8405 if len( 8406 tup[i]) > 4 and tup[i][0] == "!" and tup[i][1] == "!" and tup[i][-1] == "!" and tup[i][-2] == "!": 8407 # we have a reg ex inside - this deals with legacy 8408 # replacement text where enclosure in double-shouts was 8409 # required 8410 tup[i] = tup[i][2:-2] 8411 if (i < len(tup) - 1 or text_type == 8412 'synonyms') and not tup[i]: 8413 write_log( 8414 release_id, 8415 'warning', 8416 '%s: entries must not be blank - error in %s', 8417 text_type, 8418 syn) 8419 if self.WARNING: 8420 self.append_tag( 8421 release_id, 8422 tm, 8423 '~cwp_warning', 8424 '7. ' + text_type + ': entries must not be blank - error in ' + syn) 8425 tup[i] = "**BAD**" 8426 elif [tup for t in synonyms if tup[i] in t]: 8427 write_log( 8428 release_id, 8429 'warning', 8430 '%s: keys cannot duplicate any in existing %s - error in %s ' 8431 '- omitted from %s. To fix, place all %s in one tuple.', 8432 text_type, 8433 text_type, 8434 syn, 8435 text_type, 8436 text_type) 8437 if self.WARNING: 8438 self.append_tag(release_id, tm, '~cwp_warning', 8439 '7. ' + text_type + ': keys cannot duplicate any in existing ' + text_type + ' - error in ' + 8440 syn + ' - omitted from ' + text_type + '. To fix, place all ' + text_type + ' in one tuple.') 8441 tup[i] = "**BAD**" 8442 if "**BAD**" in tup: 8443 continue 8444 else: 8445 synonyms.append(tup) 8446 else: 8447 write_log( 8448 release_id, 8449 'warning', 8450 'Error in %s format for %s', 8451 text_type, 8452 syn) 8453 if self.WARNING: 8454 self.append_tag( 8455 release_id, 8456 tm, 8457 '~cwp_warning', 8458 '7. Error in ' + 8459 text_type + 8460 ' format for ' + 8461 syn) 8462 write_log(release_id, 'info', "%s: %s", text_type, synonyms) 8463 return synonyms 8464 8465 @staticmethod 8466 def stencil(release_id, matches_tuple, test_string): 8467 """ 8468 Produce lists of matching items, AND the items in between, in equal length lists 8469 :param release_id: 8470 :param matches_tuple: tuple of regex matches 8471 :param test_string: original string used in regex 8472 :return: 'match list' - list of matched strings, 'gap list' - list of strings in gaps between matches 8473 """ 8474 match_items = [] 8475 gap_items = [] 8476 dummy = False 8477 pointer = 0 8478 write_log( 8479 release_id, 8480 'debug', 8481 'In fn stencil. test_string = %s. matches_tuple = %s', 8482 test_string, 8483 matches_tuple) 8484 for match_num, match in enumerate(matches_tuple): 8485 start = match.start() 8486 end = match.end() 8487 if start > pointer: 8488 if pointer == 0: 8489 # add a null word item at start to keep the lists the same 8490 # length 8491 match_items.append('') 8492 dummy = True 8493 gap_items.append(test_string[pointer:start]) 8494 else: 8495 if pointer > 0: 8496 # shouldn't happen, but just in case there are two word 8497 # items with no gap 8498 gap_items.append('') 8499 match_items.append(test_string[start:end]) 8500 pointer = end 8501 if match_num + 1 == len(matches_tuple): 8502 # pick up any punc items at end 8503 gap_items.append(test_string[pointer:]) 8504 return { 8505 'match list': match_items, 8506 'gap list': gap_items, 8507 'dummy': dummy} 8508 8509 def boil(self, release_id, s): 8510 """ 8511 Remove punctuation, spaces, capitals and accents for string comparisons 8512 :param release_id: name for log file - usually =musicbrainz_albumid 8513 unless called outside metadata processor 8514 :param s: 8515 :return: 8516 """ 8517 write_log(release_id, 'debug', "boiling %s", s) 8518 s = s.lower() 8519 s = replace_roman_numerals(s) 8520 s = s.replace('sch', 'sh')\ 8521 .replace(u'\xdf', 'ss')\ 8522 .replace('sz', 'ss')\ 8523 .replace(u'\u0153', 'oe')\ 8524 .replace('oe', 'o')\ 8525 .replace(u'\u00fc', 'ue')\ 8526 .replace('ue', 'u')\ 8527 .replace(u'\u00e6', 'ae')\ 8528 .replace('ae', 'a')\ 8529 .replace(u'\u266F', 'sharp')\ 8530 .replace(u'\u266D', 'flat')\ 8531 .replace(u'\u2013', '-')\ 8532 .replace(u'\u2014', '-') 8533 # first term above is to remove the markers used for synonyms, to 8534 # enable a true comparison 8535 punc = re.compile(r'\W*', re.ASCII) 8536 s = ''.join( 8537 c for c in unicodedata.normalize( 8538 'NFD', 8539 s) if unicodedata.category(c) != 'Mn') 8540 boiled = punc.sub('', s).strip().lower().rstrip("s'") 8541 write_log(release_id, 'debug', "boiled result = %s", boiled) 8542 return boiled 8543 8544 8545################ 8546# OPTIONS PAGE # 8547################ 8548 8549class ClassicalExtrasOptionsPage(OptionsPage): 8550 NAME = "classical_extras" 8551 TITLE = "Classical Extras" 8552 PARENT = "plugins" 8553 opts = plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') +\ 8554 plugin_options('workparts') + plugin_options('genres') + plugin_options('other') 8555 8556 options = [ 8557 IntOption("persist", 'ce_tab', 0) 8558 ] 8559 # custom logging for non-album-related messages is written to session.log 8560 for opt in opts: 8561 if 'type' in opt: 8562 if 'default' in opt: 8563 default = opt['default'] 8564 else: 8565 default = "" 8566 if opt['type'] == 'Boolean': 8567 options.append(BoolOption("setting", opt['option'], default)) 8568 elif opt['type'] == 'Text' or opt['type'] == 'Combo' or opt['type'] == 'PlainText': 8569 options.append(TextOption("setting", opt['option'], default)) 8570 elif opt['type'] == 'Integer': 8571 options.append(IntOption("setting", opt['option'], default)) 8572 else: 8573 write_log( 8574 "session", 8575 'error', 8576 "Error in setting options for option = %s", 8577 opt['option']) 8578 8579 def __init__(self, parent=None): 8580 super(ClassicalExtrasOptionsPage, self).__init__(parent) 8581 self.ui = Ui_ClassicalExtrasOptionsPage() 8582 self.ui.setupUi(self) 8583 8584 def load(self): 8585 """ 8586 Load the options - NB all options are set in plugin_options, so this just parses that 8587 :return: 8588 """ 8589 opts = plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') +\ 8590 plugin_options('workparts') + plugin_options('genres') + plugin_options('other') 8591 8592 # To force a toggle so that signal given 8593 toggle_list = ['use_cwp', 8594 'use_cea', 8595 'cea_override', 8596 'cwp_override', 8597 'cea_ra_use', 8598 'cea_split_lyrics', 8599 'cwp_partial', 8600 'cwp_arrangements', 8601 'cwp_medley', 8602 'cwp_use_muso_refdb', 8603 'ce_show_ui_tags',] 8604 8605 # open at last used tab 8606 if 'ce_tab' in config.persist: 8607 cfg_val = config.persist['ce_tab'] or 0 8608 if 0 <= cfg_val <= 5: 8609 self.ui.tabWidget.setCurrentIndex(cfg_val) 8610 else: 8611 self.ui.tabWidget.setCurrentIndex(0) 8612 8613 for opt in opts: 8614 if opt['option'] == 'classical_work_parts': 8615 ui_name = 'use_cwp' 8616 elif opt['option'] == 'classical_extra_artists': 8617 ui_name = 'use_cea' 8618 else: 8619 ui_name = opt['option'] 8620 if ui_name in toggle_list: 8621 not_setting = not self.config.setting[opt['option']] 8622 self.ui.__dict__[ui_name].setChecked(not_setting) 8623 8624 if opt['type'] == 'Boolean': 8625 self.ui.__dict__[ui_name].setChecked( 8626 self.config.setting[opt['option']]) 8627 elif opt['type'] == 'Text': 8628 self.ui.__dict__[ui_name].setText( 8629 self.config.setting[opt['option']]) 8630 elif opt['type'] == 'PlainText': 8631 self.ui.__dict__[ui_name].setPlainText( 8632 self.config.setting[opt['option']]) 8633 elif opt['type'] == 'Combo': 8634 self.ui.__dict__[ui_name].setEditText( 8635 self.config.setting[opt['option']]) 8636 elif opt['type'] == 'Integer': 8637 self.ui.__dict__[ui_name].setValue( 8638 self.config.setting[opt['option']]) 8639 else: 8640 write_log( 8641 'session', 8642 'error', 8643 "Error in loading options for option = %s", 8644 opt['option']) 8645 8646 def save(self): 8647 opts = plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') +\ 8648 plugin_options('workparts') + plugin_options('genres') + plugin_options('other') 8649 8650 # save tab setting 8651 config.persist['ce_tab'] = self.ui.tabWidget.currentIndex() 8652 8653 for opt in opts: 8654 if opt['option'] == 'classical_work_parts': 8655 ui_name = 'use_cwp' 8656 elif opt['option'] == 'classical_extra_artists': 8657 ui_name = 'use_cea' 8658 else: 8659 ui_name = opt['option'] 8660 if opt['type'] == 'Boolean': 8661 self.config.setting[opt['option']] = self.ui.__dict__[ 8662 ui_name].isChecked() 8663 elif opt['type'] == 'Text': 8664 self.config.setting[opt['option']] = str( 8665 self.ui.__dict__[ui_name].text()) 8666 elif opt['type'] == 'PlainText': 8667 self.config.setting[opt['option']] = str( 8668 self.ui.__dict__[ui_name].toPlainText()) 8669 elif opt['type'] == 'Combo': 8670 self.config.setting[opt['option']] = str( 8671 self.ui.__dict__[ui_name].currentText()) 8672 elif opt['type'] == 'Integer': 8673 self.config.setting[opt['option'] 8674 ] = self.ui.__dict__[ui_name].value() 8675 else: 8676 write_log( 8677 'session', 8678 'error', 8679 "Error in saving options for option = %s", 8680 opt['option']) 8681 8682 8683################# 8684# MAIN ROUTINE # 8685################# 8686 8687# custom logging for non-album-related messages is written to session.log 8688write_log('session', 'basic', 'Loading ' + PLUGIN_NAME) 8689 8690# SET UI COLUMNS FOR PICARD RHS 8691if config.setting['ce_show_ui_tags'] and config.setting['ce_ui_tags']: 8692 from picard.ui.itemviews import MainPanel 8693 UI_TAGS = get_ui_tags().items() 8694 for heading, tag_names in UI_TAGS: 8695 heading_tag = '~' + heading + '_VAL' 8696 MainPanel.columns.append((N_(heading), heading_tag)) 8697 write_log('session', 'info', 'UI_TAGS') 8698 write_log('session', 'info', UI_TAGS) 8699 8700 8701# set defaults for certain options that MUST be manually changed by the 8702# user each time they are to be over-ridden 8703config.setting['use_cache'] = True 8704config.setting['ce_options_overwrite'] = False 8705config.setting['track_ars'] = True 8706config.setting['release_ars'] = True 8707 8708 8709# REFERENCE DATA 8710REF_DICT = get_references_from_file( 8711 'session', 8712 config.setting['cwp_muso_path'], 8713 config.setting['cwp_muso_refdb']) 8714write_log('session', 'info', 'External references (Muso):') 8715write_log('session', 'info', REF_DICT) 8716COMPOSER_DICT = REF_DICT['composers'] 8717if config.setting['cwp_muso_classical'] and not COMPOSER_DICT: 8718 write_log('session', 'error', 'No composer roster found') 8719for cd in COMPOSER_DICT: 8720 cd['lc_name'] = [c.lower() for c in cd['name']] 8721 cd['lc_sort'] = [c.lower() for c in cd['sort']] 8722PERIOD_DICT = REF_DICT['periods'] 8723if (config.setting['cwp_muso_dates'] 8724 or config.setting['cwp_muso_periods']) and not PERIOD_DICT: 8725 write_log('session', 'error', 'No period map found') 8726GENRE_DICT = REF_DICT['genres'] 8727if config.setting['cwp_muso_genres'] and not GENRE_DICT: 8728 write_log('session', 'error', 'No classical genre list found') 8729 8730# API CALLS 8731register_track_metadata_processor(PartLevels().add_work_info) 8732register_track_metadata_processor(ExtraArtists().add_artist_info) 8733register_options_page(ClassicalExtrasOptionsPage) 8734 8735# END 8736write_log('session', 'basic', 'Finished intialisation') 8737