1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2018 Mark Evens
4#
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License
7# as published by the Free Software Foundation; either version 2
8# of the License, or (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18# 02110-1301, USA.
19
20PLUGIN_NAME = u'Classical Extras'
21PLUGIN_AUTHOR = u'Mark Evens'
22PLUGIN_DESCRIPTION = u"""Classical Extras provides tagging enhancements for Picard and, in particular,
23utilises MusicBrainz’s hierarchy of works to provide work/movement tags. All options are set through a
24user interface in Picard options->plugins. This interface provides separate sections
25to enhance artist/performer tags, works and parts, genres and also allows for a generalised
26"tag mapping" (simple scripting).
27While it is designed to cater for the complexities of classical music tagging,
28it may also be useful for other music which has more than just basic song/artist/album data.
29<br /><br />
30The options screen provides five tabs for users to control the tags produced:
31<br /><br />
321. Artists: Options as to whether artist tags will contain standard MB names, aliases or as-credited names.
33Ability to include and annotate names for specialist roles (chorus master, arranger, lyricist etc.).
34Ability to read lyrics tags on the file which has been loaded and assign them to track and album levels if required.
35(Note: Picard will not normally process incoming file tags).
36<br /><br />
372. Works and parts: The plugin will build a hierarchy of works and parts (e.g. Work -> Part -> Movement or
38Opera -> Act -> Number) based on the works in MusicBrainz's database. These can then be displayed in tags in a variety
39of ways according to user preferences. Furthermore partial recordings, medleys, arrangements and collections of works
40are all handled according to user choices. There is a processing overhead for this at present because MusicBrainz limits
41look-ups to one per second.
42<br /><br />
433. Genres etc.: Options are available to customise the source and display of information relating to genres,
44instruments, keys, work dates and periods. Additional capabilities are provided for users of Muso (or others who
45provide the relevant XML files) to use pre-existing databases of classical genres, classical composers and classical
46periods.
47<br /><br />
484. Tag mapping: in some ways, this is a simple substitute for some of Picard's scripting capability. The main advantage
49 is that the plugin will remember what tag mapping you use for each release (or even track).
50<br /><br />
515. Advanced: Various options to control the detailed processing of the above.
52<br /><br />
53All user options can be saved on a per-album (or even per-track) basis so that tweaks can be used to deal with
54inconsistencies in the MusicBrainz data (e.g. include English titles from the track listing where the MusicBrainz works
55are in the composer's language and/or script).
56Also existing file tags can be processed (not possible in native Picard).
57<br /><br />
58See the readme file <a href="https://github.com/MetaTunes/picard-plugins/tree/metabrainz/2.0/plugins/classical_extras">
59on GitHub here</a> for full details.
60"""
61
62########################
63# DEVELOPERS NOTES: ####
64########################
65#  This plugin contains 3 classes:
66#
67# I. ("EXTRA ARTISTS") Create sorted fields for all performers. Creates a number of variables with alternative values
68# for "artists" and "artist".
69# Creates an ensemble variable for all ensemble-type performers.
70# Also creates matching sort fields for artist and artists.
71# Additionally create tags for artist types which are not normally created in Picard - particularly for classical music
72#  (notably instrument arrangers).
73#
74# II. ("PART LEVELS" [aka Work Parts]) Create tags for the hierarchy of works which contain a given track recording
75# - particularly for classical music'
76# Variables provided for each work level, with implied part names
77# Mixed metadata provided including work and title elements
78#
79# III. ("OPTIONS") Allows the user to set various options including what tags will be written
80# (otherwise the classes above will just write outputs to "hidden variables")
81#
82# The main control routine is at the end of the module
83
84PLUGIN_VERSION = '2.0.12'
85PLUGIN_API_VERSIONS = ["2.0", "2.1", "2.2", "2.3", "2.4"]
86PLUGIN_LICENSE = "GPL-2.0"
87PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html"
88
89from picard.ui.options import register_options_page, OptionsPage
90from picard.plugins.classical_extras.ui_options_classical_extras import Ui_ClassicalExtrasOptionsPage
91import picard.plugins.classical_extras.suffixtree
92from picard import config, log
93from picard.config import ConfigSection, BoolOption, IntOption, TextOption
94from picard.util import LockableObject, uniqify
95
96# note that in 2.0 picard.webservice changed to picard.util.xml
97from picard.util.xml import XmlNode
98from picard.util import translate_from_sortname
99from picard.metadata import register_track_metadata_processor, Metadata
100from functools import partial
101from datetime import datetime
102import collections
103import re
104import unicodedata
105import json
106import copy
107import os
108from PyQt5.QtCore import QXmlStreamReader
109from picard.const import USER_DIR
110import operator
111import ast
112import picard.plugins.classical_extras.const
113
114
115
116##########################
117# MODULE-WIDE COMPONENTS #
118##########################
119# CONSTANTS
120# N.B. Constants with long definitions are set in const.py
121DATE_SEP = '-'
122
123# COMMONLY USED REGEX
124ROMAN_NUMERALS = r'\b((?=[MDCLXVI])(M{0,4}(CM|CD|D?)?C{0,3}(XC|XL|L?)?X{0,3}(IX|IV|V?)?I{0,3}))(?:\.|\-|:|;|,|\s|$)'
125ROMAN_NUMERALS_AT_START = r'^\W*' + ROMAN_NUMERALS
126RE_ROMANS = re.compile(ROMAN_NUMERALS, re.IGNORECASE)
127RE_ROMANS_AT_START = re.compile(ROMAN_NUMERALS_AT_START, re.IGNORECASE)
128# KEYS
129RE_NOTES = r'(\b[ABCDEFG])'
130RE_ACCENTS = r'(\-sharp(?:\s+|\b)|\-flat(?:\s+|\b)|\ssharp(?:\s+|\b)|\sflat(?:\s+|\b)|\u266F(?:\s+|\b)|\u266D(?:\s+|\b)|(?:[:,.]?\s+|$|\-))'
131RE_SCALES = r'(major|minor)?(?:\b|$)'
132RE_KEYS = re.compile(
133    RE_NOTES + RE_ACCENTS + RE_SCALES,
134    re.UNICODE | re.IGNORECASE)
135
136# LOGGING
137
138# If logging occurs before any album is loaded, the startup log file will
139# be written
140log_files = collections.defaultdict(dict)
141# entries are release-ids: to keep track of which log files are open
142release_status = collections.defaultdict(dict)
143# release_status[release_id]['works'] = True indicates that we are still processing works for release_id
144# & similarly for 'artists'
145# release_status[release_id]['start'] holds start time of release processing
146# release_status[release_id]['name'] holds the album name
147# release_status[release_id]['lookups'] holds number of lookups for this release
148# release_status[release_id]['file_objects'] holds a cumulative list of file objects (tagger seems a bit unreliable)
149# release_status[release_id]['file_found'] = False indicates that "No file
150# with matching trackid" has (yet) been found
151
152
153def write_log(release_id, log_type, message, *args):
154    """
155    Custom logging function - if log_info is set, all messages will be written to a custom file in a 'Classical_Extras'
156    subdirectory in the same directory as the main Picard log. A different file is used for each album,
157    to aid in debugging - the log file is release_id.log. Any startup messages (i.e. before a release has been loaded)
158    are written to session.log. Summary information for each release is also written to session.log even if log_info
159    is not set.
160    :param release_id: name for log file - usually =musicbrainz_albumid
161        unless called outside metadata processor
162    :param log_type: 'error', 'warning', 'debug' or 'info'
163    :param message: string, e.g. 'error message for workid: %s'
164    :param args: arguments for parameters in string, e.g. if workId then str(workId) will replace %s in the above
165    :return:
166    """
167    options = config.setting
168    if not isinstance(message, str):
169        msg = repr(message)
170    else:
171        msg = message
172    if args:
173        msg = msg % args
174
175    if options["log_info"] or log_type == "basic":
176        # if log_info is True, all log messages will be written to the custom log, regardless of other log_... settings
177        # basic session log will always be written (summary of releases and
178        # processing times)
179        filename = release_id + ".log"
180        log_dir = os.path.join(USER_DIR, "Classical_Extras")
181        if not os.path.exists(log_dir):
182            os.makedirs(log_dir)
183        if release_id not in log_files:
184            try:
185                if release_id == 'session':
186                    log_file = open(
187                        os.path.join(
188                            log_dir,
189                            filename),
190                        'w',
191                        encoding='utf8',
192                        buffering=1)
193                    # buffering=1 so that session log (low volume) is up to
194                    # date even if not closed
195                else:
196                    log_file = open(
197                        os.path.join(
198                            log_dir,
199                            filename),
200                        'w',
201                        encoding='utf8')  # , buffering=1)
202                    # default buffering for speed, buffering = 1 for currency
203                log_files[release_id] = log_file
204                log_file.write(
205                    PLUGIN_NAME +
206                    ' Version:' +
207                    PLUGIN_VERSION +
208                    '\n')
209                if release_id == 'session':
210                    log_file.write('session' + '\n')
211                else:
212                    log_file.write('Release id: ' + release_id + '\n')
213                    if release_id in release_status and 'name' in release_status[release_id]:
214                        log_file.write(
215                            'Album name: ' + release_status[release_id]['name'] + '\n')
216            except IOError:
217                log.error('Unable to open file %s for writing log', filename)
218                return
219        else:
220            log_file = log_files[release_id]
221        try:
222            log_file.write(log_type[0].upper() + ': ')
223            log_file.write(str(datetime.now()) + ' : ')
224            log_file.write(msg)
225            log_file.write("\n")
226        except IOError:
227            log.error('Unable to write to log file %s', filename)
228            return
229    # Only debug, warning and error messages will be written to the main
230    # Picard log, if those options have been set
231    if log_type != 'info' and log_type != 'basic':  # i.e. non-custom log items
232        message2 = PLUGIN_NAME + ': ' + message
233    else:
234        message2 = message
235    if log_type == 'debug' and options["log_debug"]:
236        if release_id in release_status and 'debug' in release_status[release_id]:
237            add_list_uniquely(release_status[release_id]['debug'], msg)
238        else:
239            release_status[release_id]['debug'] = [msg]
240        log.debug(message2, *args)
241    if log_type == 'warning' and options["log_warning"]:
242        if release_id in release_status and 'warnings' in release_status[release_id]:
243            add_list_uniquely(release_status[release_id]['warnings'], msg)
244        else:
245            release_status[release_id]['warnings'] = [msg]
246        if args:
247            log.warning(message2, *args)
248        else:
249            log.warning(message2)
250    if log_type == 'error' and options["log_error"]:
251        if release_id in release_status and 'errors' in release_status[release_id]:
252            add_list_uniquely(release_status[release_id]['errors'], msg)
253        else:
254            release_status[release_id]['errors'] = [msg]
255        if args:
256            log.error(message2, *args)
257        else:
258            log.error(message2)
259
260
261def close_log(release_id, caller):
262    # close the custom log file if we are done
263    if release_id == 'session':   # shouldn't happen but, just in case, don't close the session log
264        return
265    if caller in ['works', 'artists']:
266        release_status[release_id][caller] = False
267    if (caller == 'works' and release_status[release_id]['artists']) or \
268            (caller == 'artists' and release_status[release_id]['works']):
269        # log.error('exiting close_log. only %s done', caller) # debug line
270        return
271    duration = 'N/A'
272    lookups = 'N/A'
273    artists_time = 0
274    works_time = 0
275    lookup_time = 0
276    album_process_time = 0
277    if release_id in release_status:
278        duration = datetime.now() - release_status[release_id]['start']
279        lookups = release_status[release_id]['lookups']
280        done_lookups = release_status[release_id]['done-lookups']
281        lookup_time = done_lookups - release_status[release_id]['start']
282        album_process_time = duration - lookup_time
283        artists_time = release_status[release_id]['artists-done'] - \
284            release_status[release_id]['start']
285        works_time = release_status[release_id]['works-done'] - \
286            release_status[release_id]['start']
287        del release_status[release_id]['start']
288        del release_status[release_id]['lookups']
289        del release_status[release_id]['done-lookups']
290        del release_status[release_id]['artists-done']
291        del release_status[release_id]['works-done']
292    if release_id in log_files:
293        write_log(
294            release_id,
295            'info',
296            'Duration = %s. Number of lookups = %s.',
297            duration,
298            lookups)
299        write_log(release_id, 'info', 'Closing log file for %s', release_id)
300        log_files[release_id].close()
301        del log_files[release_id]
302    if 'session' in log_files and release_id in release_status:
303        write_log(
304            'session',
305            'basic',
306            '\n Completed processing release id %s. Details below:-',
307            release_id)
308        if 'name' in release_status[release_id]:
309            write_log('session', 'basic', 'Album name %s',
310                      release_status[release_id]['name'])
311        if 'errors' in release_status[release_id]:
312            write_log(
313                'session',
314                'basic',
315                '-------------------- Errors --------------------')
316            for error in release_status[release_id]['errors']:
317                write_log('session', 'basic', error)
318            del release_status[release_id]['errors']
319        if 'warnings' in release_status[release_id]:
320            write_log(
321                'session',
322                'basic',
323                '-------------------- Warnings --------------------')
324            for warning in release_status[release_id]['warnings']:
325                write_log('session', 'basic', warning)
326            del release_status[release_id]['warnings']
327        if 'debug' in release_status[release_id]:
328            write_log(
329                'session',
330                'basic',
331                '-------------------- Debug log --------------------')
332            for debug in release_status[release_id]['debug']:
333                write_log('session', 'basic', debug)
334            del release_status[release_id]['debug']
335        write_log(
336            'session',
337            'basic',
338            'Duration = %s. Artists time = %s. Works time = %s. Of which: Lookup time = %s. '
339            'Album-process time = %s. Number of lookups = %s.',
340            duration,
341            artists_time,
342            works_time,
343            lookup_time,
344            album_process_time,
345            lookups)
346    if release_id in release_status:
347        del release_status[release_id]
348
349
350# FILE READING AND OBJECT PARSING
351
352_node_name_re = re.compile('[^a-zA-Z0-9]')
353
354
355def _node_name(n):
356    return _node_name_re.sub('_', str(n))
357
358
359def _read_xml(stream):
360    document = XmlNode()
361    current_node = document
362    path = []
363    while not stream.atEnd():
364        stream.readNext()
365        if stream.isStartElement():
366            node = XmlNode()
367            attrs = stream.attributes()
368            for i in range(attrs.count()):
369                attr = attrs.at(i)
370                node.attribs[_node_name(attr.name())] = str(attr.value())
371            current_node.append_child(_node_name(stream.name()), node)
372            path.append(current_node)
373            current_node = node
374        elif stream.isEndElement():
375            current_node = path.pop()
376        elif stream.isCharacters():
377            current_node.text += str(stream.text())
378    return document
379
380
381def parse_data(release_id, obj, response_list, *match):
382    """
383    This function takes any XmlNode object, or list thereof, or a JSON object
384    and extracts a list of all objects exactly matching the hierarchy listed in match.
385    match should contain list of each node in hierarchical sequence, with no gaps in the sequence
386     of nodes, to lowest level required.
387    :param release_id: name for log file - usually =musicbrainz_albumid
388        unless called outside metadata processor
389    :param obj: an XmlNode or JSON object, list or dictionary containing nodes
390    :param response_list: working memory for recursive calls
391    :param match: list of items to search for in node (see detailed notes below)
392    :return: a list of matching items (always a list, even if only one item)
393
394    Insert attribs.attribname:attribvalue in the list to select only branches where attribname
395     is attribvalue. (Omit the attribs prefix if the obj is JSON)
396    Insert childname.text:childtext in the list to select only branches where
397     a sibling with childname has text childtext.
398      (Note: childname can be a dot-list if the text is more than one level down - e.g. child1.child2
399      # TODO - Check this works fully )
400    """
401    if '!log' in response_list:
402        DEBUG = True
403        INFO = True
404    else:
405        DEBUG = False
406        INFO = False
407    # Normally logging options are off as these can be VERY wordy
408    # They can be turned on by using !log in the call
409
410    # XmlNode instances are not iterable, so need to convert to dict
411    if isinstance(obj, XmlNode):
412        obj = obj.__dict__
413    if DEBUG or INFO:
414        write_log(release_id, 'debug', 'Parsing data - looking for %s', match)
415    if INFO:
416        write_log(release_id, 'info', 'Looking in object: %s', obj)
417    if isinstance(obj, list):
418        objlen = len(obj)
419        for i, item in enumerate(obj):
420            if isinstance(item, XmlNode):
421                item = item.__dict__
422            if INFO:
423                write_log(
424                    release_id,
425                    'info',
426                    'Getting response for list item no.%s of %s - object is: %s',
427                    i + 1,
428                    objlen,
429                    item)
430            parse_data(release_id, item, response_list, *match)
431            if INFO:
432                write_log(
433                    release_id,
434                    'info',
435                    'response_list for list item no.%s of %s is %s',
436                    i + 1,
437                    objlen,
438                    response_list)
439        return response_list
440    elif isinstance(obj, dict):
441        if match[0] in obj:
442            if len(match) == 1:
443                response = obj[match[0]]
444                if response is not None:  # To prevent adding NoneTypes to list
445                    response_list.append(response)
446                if INFO:
447                    write_log(
448                        release_id,
449                        'info',
450                        'response_list (last match item): %s',
451                        response_list)
452            else:
453                match_list = list(match)
454                match_list.pop(0)
455                parse_data(release_id, obj[match[0]],
456                           response_list, *match_list)
457                if INFO:
458                    write_log(
459                        release_id,
460                        'info',
461                        'response_list (passing up): %s',
462                        response_list)
463            return response_list
464        elif ':' in match[0]:
465            test = match[0].split(':')
466            match2 = test[0].split('.')
467            test_data = parse_data(release_id, obj, [], *match2)
468            if INFO:
469                write_log(
470                    release_id,
471                    'info',
472                    'Value comparison - looking in %s for value %s',
473                    test_data,
474                    test[1])
475            if len(test) > 1:
476                # latter is because Booleans are stored as such, not as
477                # strings, in JSON
478                if (test[1] in test_data) or (
479                        (test[1] == 'True') in test_data):
480                    if len(match) == 1:
481                        response = obj
482                        if response is not None:
483                            response_list.append(response)
484                    else:
485                        match_list = list(match)
486                        match_list.pop(0)
487                        parse_data(release_id, obj, response_list, *match_list)
488            else:
489                parse_data(release_id, obj, response_list, *match2)
490            if INFO:
491                write_log(
492                    release_id,
493                    'info',
494                    'response_list (from value look-up): %s',
495                    response_list)
496            return response_list
497        else:
498            if 'children' in obj:
499                parse_data(release_id, obj['children'], response_list, *match)
500            if INFO:
501                write_log(
502                    release_id,
503                    'info',
504                    'response_list (from children): %s',
505                    response_list)
506            return response_list
507    else:
508        if INFO:
509            write_log(
510                release_id,
511                'info',
512                'response_list (obj is not a list or dict): %s',
513                response_list)
514        return response_list
515
516
517def create_dict_from_ref_list(options, release_id, ref_list, keys, tags):
518    ref_dict_list = []
519    for refs in ref_list:
520        for ref in refs:
521            parsed_refs = [
522                parse_data(
523                    release_id,
524                    ref,
525                    [],
526                    t,
527                    'text') for t in tags]
528            ref_dict_list.append(dict(zip(keys, parsed_refs)))
529    return ref_dict_list
530
531
532def get_references_from_file(release_id, path, filename):
533    """
534    Lookup Muso Reference.xml or similar
535    :param release_id: name of log file
536    :param path: Reference file path
537    :param filename: Reference file name
538    :return:
539    """
540    options = config.setting
541    composer_dict_list = []
542    period_dict_list = []
543    genre_dict_list = []
544    xml_file = None
545    try:
546        xml_file = open(os.path.join(path, filename), encoding="utf8")
547        reply = xml_file.read()
548        xml_file.close()
549        document = _read_xml(QXmlStreamReader(reply))
550        # Composers
551        composer_list = parse_data(
552            release_id, document, [], 'ReferenceDB', 'Composer')
553        keys = ['name', 'sort', 'birth', 'death', 'country', 'core']
554        tags = ['Name', 'Sort', 'Birth', 'Death', 'CountryCode', 'Core']
555        composer_dict_list = create_dict_from_ref_list(
556            options, release_id, composer_list, keys, tags)
557        # Periods
558        period_list = parse_data(
559            release_id,
560            document,
561            [],
562            'ReferenceDB',
563            'ClassicalPeriod')
564        keys = ['name', 'start', 'end']
565        tags = ['Name', 'Start_x0020_Date', 'End_x0020_Date']
566        period_dict_list = create_dict_from_ref_list(
567            options, release_id, period_list, keys, tags)
568        # Genres
569        genre_list = parse_data(
570            release_id,
571            document,
572            [],
573            'ReferenceDB',
574            'ClassicalGenre')
575        keys = ['name']
576        tags = ['Name']
577        genre_dict_list = create_dict_from_ref_list(
578            options, release_id, genre_list, keys, tags)
579
580    except (IOError, FileNotFoundError, UnicodeDecodeError):
581        if options['cwp_muso_genres'] or options['cwp_muso_classical'] or options['cwp_muso_dates'] or options['cwp_muso_periods']:
582            write_log(
583                release_id,
584                'error',
585                'File %s does not exist or is corrupted',
586                os.path.join(
587                    path,
588                    filename))
589    finally:
590        if xml_file:
591            xml_file.close()
592    return {
593            'composers': composer_dict_list,
594            'periods': period_dict_list,
595            'genres': genre_dict_list}
596
597# OPTIONS
598
599
600def get_preserved_tags():
601    preserved = config.setting["preserved_tags"]
602    if isinstance(preserved, str):
603        preserved = [x.strip() for x in preserved.split(',')]
604    return preserved
605
606
607def get_options(release_id, album, track):
608    """
609    Get the saved options from a release and use them according to flags set on the "advanced" tab
610    :param release_id: name for log file - usually =musicbrainz_albumid
611        unless called outside metadata processor
612    :param album: current release
613    :param track: current track
614    :return: None (result is passed via tm)
615    A common function for both Artist and Workparts, so that the first class to process a track will execute
616    this function so that the results are available to both (via a track metadata item)
617    """
618    release_status[release_id]['done'] = False
619    set_options = collections.defaultdict(dict)
620    main_sections = ['artists', 'workparts']
621    all_sections = ['artists', 'tag', 'workparts', 'genres']
622    parent_sections = {
623        'artists': 'artists',
624        'tag': 'artists',
625        'workparts': 'workparts',
626        'genres': 'workparts'}
627    # The above needs to be done for legacy reasons - there  are only two tags which store options - artists and workparts
628    # This dates from when there were only two sections
629    # To split these now will create compatibility issues
630    override = {
631        'artists': 'cea_override',
632        'tag': 'ce_tagmap_override',
633        'workparts': 'cwp_override',
634        'genres': 'ce_genres_override'}
635    sect_text = {'artists': 'Artists', 'workparts': 'Works'}
636    prefix = {'artists': 'cea', 'workparts': 'cwp'}
637
638    if album.tagger.config.setting['ce_options_overwrite'] and all(
639            album.tagger.config.setting[override[sect]] for sect in main_sections):
640        set_options[track] = album.tagger.config.setting  # mutable
641    else:
642        set_options[track] = option_settings(
643            album.tagger.config.setting)  # make a copy
644        if set_options[track]["log_info"]:
645            write_log(
646                release_id,
647                'info',
648                'Default (i.e. per UI) options for track %s are %r',
649                track,
650                set_options[track])
651
652    # As we use some of the main Picard options and may over-write them, save them here
653    # set_options[track]['translate_artist_names'] = config.setting['translate_artist_names']
654    # set_options[track]['standardize_artists'] = config.setting['standardize_artists']
655    # (not sure this is needed - TODO reconsider)
656
657    options = set_options[track]
658    tm = track.metadata
659    new_metadata = None
660    orig_metadata = None
661    # Only look up files if needed
662    file_options = {}
663    music_file = ''
664    music_file_found = None
665    release_status[release_id]['file_found'] = False
666    start = datetime.now()
667    if options["log_info"]:
668        write_log(release_id, 'info', 'Clock start at %s', start)
669    trackno = tm['tracknumber']
670    discno = tm['discnumber']
671
672    album_filenames = album.tagger.get_files_from_objects([album])
673    if options["log_info"]:
674        write_log(
675            release_id,
676            'info',
677            'No. of album files found = %s',
678            len(album_filenames))
679    # Note that sometimes Picard fails to get all the file objects, even if they are there (network issues)
680    # so we will cache whatever we can get!
681    if release_id in release_status and 'file_objects' in release_status[release_id]:
682        add_list_uniquely(
683            release_status[release_id]['file_objects'],
684            album_filenames)
685    else:
686        release_status[release_id]['file_objects'] = album_filenames
687    if options["log_info"]:
688        write_log(release_id, 'info', 'No. of album files cached = %s',
689                  len(release_status[release_id]['file_objects']))
690    track_file = None
691    for album_file in release_status[release_id]['file_objects']:
692        if options["log_info"]:
693            write_log(release_id,
694                      'info',
695                      'Track file = %s, tracknumber = %s, discnumber = %s. Metadata trackno = %s, discno = %s',
696                      album_file.filename,
697                      str(album_file.tracknumber),
698                      str(album_file.discnumber),
699                      trackno,
700                      discno)
701        if str(
702                album_file.tracknumber) == trackno and str(
703                album_file.discnumber) == discno:
704            if options["log_info"]:
705                write_log(
706                    release_id,
707                    'info',
708                    'Track file found = %r',
709                    album_file.filename)
710            track_file = album_file.filename
711            break
712
713    # Note: It would have been nice to do a rough check beforehand of total tracks,
714    # but ~totalalbumtracks is not yet populated
715    if not track_file:
716        album_fullnames = [
717            x.filename for x in release_status[release_id]['file_objects']]
718        if options["log_info"]:
719            write_log(
720                release_id,
721                'info',
722                'Album files found = %r',
723                album_fullnames)
724        for music_file in album_fullnames:
725            new_metadata = album.tagger.files[music_file].metadata
726
727            if 'musicbrainz_trackid' in new_metadata and 'musicbrainz_trackid' in tm:
728                if new_metadata['musicbrainz_trackid'] == tm['musicbrainz_trackid']:
729                    track_file = music_file
730                    break
731        # Nothing found...
732        if new_metadata and 'musicbrainz_trackid' not in new_metadata:
733            if options['log_warning']:
734                write_log(
735                    release_id,
736                    'warning',
737                    'No trackid in file %s',
738                    music_file)
739        if 'musicbrainz_trackid' not in tm:
740            if options['log_warning']:
741                write_log(
742                    release_id,
743                    'warning',
744                    'No trackid in track %s',
745                    track)
746    #
747    # Note that, on initial load, new_metadata == orig_metadata; but, after refresh, new_metadata will have
748    # the same track metadata as tm (plus the file metadata as per orig_metadata), so a trackid match
749    # is then possible for files that do not have musicbrainz_trackid in orig_metadata. That is why
750    # new_metadata is used in the above test, rather than orig_metadata, but orig_metadata is then used below
751    # to get the saved options.
752    #
753
754    # Find the tag with the options:-
755    if track_file:
756        orig_metadata = album.tagger.files[track_file].orig_metadata
757        music_file_found = track_file
758        if options['log_info']:
759            write_log(
760                release_id,
761                'info',
762                'orig_metadata for file %s is',
763                music_file)
764            write_log(release_id, 'info', orig_metadata)
765        for child_section in all_sections:
766            section = parent_sections[child_section]
767            if options[override[child_section]]:
768                if options[prefix[section] + '_options_tag'] + ':' + \
769                        section + '_options' in orig_metadata:
770                    file_options[section] = interpret(
771                        orig_metadata[options[prefix[section] + '_options_tag'] + ':' + section + '_options'])
772                elif options[prefix[section] + '_options_tag'] in orig_metadata:
773                    options_tag_contents = orig_metadata[options[prefix[section] + '_options_tag']]
774                    if isinstance(options_tag_contents, list):
775                        options_tag_contents = options_tag_contents[0]
776                    combined_options = ''.join(options_tag_contents.split(
777                        '(workparts_options)')).split('(artists_options)')
778                    for i, _ in enumerate(combined_options):
779                        combined_options[i] = interpret(
780                            combined_options[i].lstrip('; '))
781                        if isinstance(
782                                combined_options[i],
783                                dict) and 'Classical Extras' in combined_options[i]:
784                            if sect_text[section] + \
785                                    ' options' in combined_options[i]['Classical Extras']:
786                                file_options[section] = combined_options[i]
787                else:
788                    for om in orig_metadata:
789                        if ':' + section + '_options' in om:
790                            file_options[section] = interpret(
791                                orig_metadata[om])
792                if section not in file_options or not file_options[section]:
793                    if options['log_error']:
794                        write_log(
795                            release_id,
796                            'error',
797                            'Saved ' +
798                            section +
799                            ' options cannot be read for file %s. Using current settings',
800                            music_file)
801                    append_tag(
802                        release_id,
803                        tm,
804                        '~' +
805                        prefix[section] +
806                        '_error',
807                        '1. Saved ' +
808                        section +
809                        ' options cannot be read. Using current settings')
810
811        release_status[release_id]['file_found'] = True
812
813    end = datetime.now()
814    if options['log_info']:
815        write_log(release_id, 'info', 'Clock end at %s', end)
816        write_log(release_id, 'info', 'Duration = %s', end - start)
817
818    if not release_status[release_id]['file_found']:
819        if options['log_warning']:
820            write_log(
821                release_id,
822                'warning',
823                "No file with matching trackid for track %s. IF THERE SHOULD BE ONE, TRY 'REFRESH'",
824                track)
825        append_tag(
826            release_id,
827            tm,
828            "002_important_warning",
829            "No file with matching trackid - IF THERE SHOULD BE ONE, TRY 'REFRESH' - "
830            "(unable to process any saved options, lyrics or 'keep' tags)")
831        # Nothing else is done with this info as yet - ideally we need to refresh and re-run
832        # for all releases where, say, release_status[release_id]['file_prob']
833        # == True  TODO?
834
835    else:
836        if options['log_info']:
837            write_log(
838                release_id,
839                'info',
840                'Found music file: %r',
841                music_file_found)
842        for section in all_sections:
843            if options[override[section]]:
844                parent_section = parent_sections[section]
845                if parent_section in file_options and file_options[parent_section]:
846                    try:
847                        options_dict = file_options[parent_section]['Classical Extras'][sect_text[parent_section] + ' options']
848                    except TypeError as err:
849                        if options['log_error']:
850                            write_log(
851                                release_id,
852                                'error',
853                                'Error: %s. Saved ' +
854                                section +
855                                ' options cannot be read for file %s. Using current settings',
856                                err,
857                                music_file)
858                        append_tag(
859                            release_id,
860                            tm,
861                            '~' +
862                            prefix[parent_section] +
863                            '_error',
864                            '1. Saved ' +
865                            parent_section +
866                            ' options cannot be read. Using current settings')
867                        break
868                    for opt in options_dict:
869                        if isinstance(
870                                options_dict[opt],
871                                dict) and options[override['tag']]:  # for tag line options
872                            # **NB tag mapping lines are the only entries of type dict**
873                            opt_list = []
874                            for opt_item in options_dict[opt]:
875                                opt_list.append(
876                                    {opt + '_' + opt_item: options_dict[opt][opt_item]})
877                        else:
878                            opt_list = [{opt: options_dict[opt]}]
879                        for opt_dict in opt_list:
880                            for opt_det in opt_dict:
881                                opt_value = opt_dict[opt_det]
882                                addn = []
883                                if section == 'artists':
884                                    addn = plugin_options('picard')
885                                if section == 'tag':
886                                    addn = plugin_options('tag_detail')
887                                for ea_opt in plugin_options(section) + addn:
888                                    displayed_option = options[ea_opt['option']]
889                                    if ea_opt['name'] == opt_det:
890                                        if 'value' in ea_opt:
891                                            if ea_opt['value'] == opt_value:
892                                                options[ea_opt['option']] = True
893                                            else:
894                                                options[ea_opt['option']
895                                                        ] = False
896                                        else:
897                                            options[ea_opt['option']
898                                                    ] = opt_value
899                                        if options[ea_opt['option']
900                                                   ] != displayed_option:
901                                            if options['log_debug'] or options['log_info']:
902                                                write_log(
903                                                    release_id,
904                                                    'info',
905                                                    'Options overridden for option %s = %s',
906                                                    ea_opt['option'],
907                                                    opt_value)
908
909                                            opt_text = str(opt_value)
910                                            append_tag(
911                                                release_id, tm, '003_information:options_overridden', str(
912                                                    ea_opt['name']) + ' = ' + opt_text)
913
914        if orig_metadata:
915            keep_list = options['cea_keep'].split(",")
916            if options['cea_split_lyrics'] and options['cea_lyrics_tag']:
917                keep_list.append(options['cea_lyrics_tag'])
918            if options['cwp_genres_use_file']:
919                if 'genre' in orig_metadata:
920                    append_tag(
921                        release_id,
922                        tm,
923                        '~cwp_candidate_genres',
924                        orig_metadata['genre'])
925                if options['cwp_genre_tag'] and options['cwp_genre_tag'] in orig_metadata:
926                    keep_list.append(options['cwp_genre_tag'])
927            really_keep_list = get_preserved_tags()[:]
928            really_keep_list.append(
929                options['cwp_options_tag'] +
930                ':workparts_options')
931            really_keep_list.append(
932                options['cea_options_tag'] +
933                ':artists_options')
934            for tagx in keep_list:
935                tag = tagx.strip()
936                really_keep_list.append(tag)
937                if tag in orig_metadata:
938                    append_tag(release_id, tm, tag, orig_metadata[tag])
939            if options['cea_clear_tags']:
940                delete_list = []
941                for tag_item in orig_metadata:
942                    if tag_item not in really_keep_list and tag_item[0] != '~':
943                        # the second condition is to ensure that (hidden) file variables are not deleted,
944                        #  as these are in orig_metadata, not track_metadata
945                        delete_list.append(tag_item)
946                # this will be used in map_tags to delete unwanted tags
947                options['delete_tags'] = delete_list
948            ## Create a "mirror" tag with the old data, for comparison purposes
949            mirror_tags = []
950            for tag_item in orig_metadata:
951                mirror_name = tag_item + '_OLD'
952                if mirror_name[0] == '~' :
953                    mirror_name.replace('~', '_')
954                mirror_name = '~' + mirror_name
955                mirror_tags.append((mirror_name, tag_item))
956                append_tag(release_id, tm, mirror_name, orig_metadata[tag_item])
957            append_tag(release_id, tm, '~ce_mirror_tags', mirror_tags)
958
959        if not isinstance(options, dict):
960            options_dict = option_settings(config.setting)
961            write_log(
962                'session',
963                'info',
964                'Using option_settings(config.setting): %s',
965                options_dict)
966        else:
967            options_dict = options
968            write_log(
969                'session',
970                'info',
971                'Using options: %s',
972                options_dict)
973        tm['~ce_options'] = str(options_dict)
974        tm['~ce_file'] = music_file_found
975
976
977def plugin_options(option_type):
978    """
979    :param option_type: artists, tag, workparts, genres or other
980    :return: the relevant dictionary for the type
981    This function contains all the options data in one place - to prevent multiple repetitions elsewhere
982    """
983    if option_type == 'artists':
984        return const.ARTISTS_OPTIONS
985    elif option_type == 'tag':
986        return const.TAG_OPTIONS
987    elif option_type == 'tag_detail':
988        return const.TAG_DETAIL_OPTIONS
989    elif option_type == 'workparts':
990        return const.WORKPARTS_OPTIONS
991    elif option_type == 'genres':
992        return const.GENRE_OPTIONS
993    elif option_type == 'picard':
994        return const.PICARD_OPTIONS
995    elif option_type == 'other':
996        return const.OTHER_OPTIONS
997    else:
998        return None
999
1000def option_settings(config_settings):
1001    """
1002    :param config_settings: options from UI
1003    :return: a (deep) copy of the Classical Extras options
1004    """
1005    options = {}
1006    for option in plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') + plugin_options(
1007            'workparts') + plugin_options('genres') + plugin_options('picard') + plugin_options('other'):
1008        options[option['option']] = copy.deepcopy(
1009            config_settings[option['option']])
1010    return options
1011
1012
1013def get_aliases(self, release_id, album, options, releaseXmlNode):
1014    """
1015    :param release_id: name for log file - usually =musicbrainz_albumid
1016        unless called outside metadata processor
1017    :param self:
1018    :param album:
1019    :param options:
1020    :param releaseXmlNode: all the metadata for the release
1021    :return: Data is returned via self.artist_aliases and self.artist_credits[album]
1022
1023    Note regarding aliases and credited-as names:
1024    In a MB release, an artist can appear in one of seven contexts. Each of these is accessible in releaseXmlNode
1025    and the track and recording contexts are also accessible in trackXmlNode.
1026    The seven contexts are:
1027    Recording: credited-as and alias
1028    Release-group: credited-as and alias
1029    Release: credited-as and alias
1030    Release relationship: credited-as and (not reliably?) alias
1031    Recording relationship (direct): credited-as and (not reliably?) alias
1032    Recording relationship (via work): credited-as and (not reliably?) alias
1033    Track: credited-as and alias
1034    (The above are applied in sequence - e.g. track artist credit will over-ride release artist credit. "Recording" gets
1035    the lowest priority as it is more generic than the release data {may apply to multiple releases})
1036    This function collects all the available aliases and as-credited names once (on processing the first track).
1037    N.B. if more than one release is loaded in Picard, any available alias names loaded so far will be available
1038    and used. However, as-credited names will only be used from the current release."""
1039
1040    if 'artist_locale' in config.setting and options['cea_aliases'] or options['cea_aliases_composer']:
1041        locale = config.setting["artist_locale"]
1042        lang = locale.split("_")[0]  # NB this is the Picard code in /util
1043
1044        # Track and recording aliases/credits are gathered by parsing the
1045        # media, track and recording nodes
1046        # Do the recording relationship first as it may apply to multiple releases, so release and track data
1047        # is more specific.
1048        media = parse_data(release_id, releaseXmlNode, [], 'media')
1049        for m in media:
1050            # disc_num = int(parse_data(options, m, [], 'position', 'text')[0])
1051            # not currently used
1052            tracks = parse_data(release_id, m, [], 'tracks')
1053            for track in tracks:
1054                for t in track:
1055                    # track_num = int(parse_data(options, t, [], 'number',
1056                    # 'text')[0]) # not currently used
1057
1058                    # Recording artists
1059                    obj = parse_data(release_id, t, [], 'recording')
1060                    get_aliases_and_credits(
1061                        self,
1062                        options,
1063                        release_id,
1064                        album,
1065                        obj,
1066                        lang,
1067                        options['cea_recording_credited'])
1068
1069        # Get the release data before the recording relationshiops and track data
1070        # Release group artists
1071        obj = parse_data(release_id, releaseXmlNode, [], 'release-group')
1072        get_aliases_and_credits(
1073            self,
1074            options,
1075            release_id,
1076            album,
1077            obj,
1078            lang,
1079            options['cea_group_credited'])
1080
1081        # Release artists
1082        get_aliases_and_credits(
1083            self,
1084            options,
1085            release_id,
1086            album,
1087            releaseXmlNode,
1088            lang,
1089            options['cea_credited'])
1090        # Next bit needed to identify artists who are album artists
1091        self.release_artists_sort[album] = parse_data(
1092            release_id, releaseXmlNode, [], 'artist-credit', 'artist', 'sort-name')
1093        # Release relationship artists
1094        get_relation_credits(
1095            self,
1096            options,
1097            release_id,
1098            album,
1099            releaseXmlNode,
1100            lang,
1101            options['cea_release_relationship_credited'])
1102
1103        # Now get the rest:
1104        for m in media:
1105            tracks = parse_data(release_id, m, [], 'tracks')
1106            for track in tracks:
1107                for t in track:
1108                    # Recording relationship artists
1109                    obj = parse_data(release_id, t, [], 'recording')
1110                    get_relation_credits(
1111                        self,
1112                        options,
1113                        release_id,
1114                        album,
1115                        obj,
1116                        lang,
1117                        options['cea_recording_relationship_credited'])
1118                    # Track artists
1119                    get_aliases_and_credits(
1120                        self,
1121                        options,
1122                        release_id,
1123                        album,
1124                        t,
1125                        lang,
1126                        options['cea_track_credited'])
1127
1128    if options['log_info']:
1129        write_log(release_id, 'info', 'Alias and credits info for %s', self)
1130        write_log(release_id, 'info', 'Aliases :%s', self.artist_aliases)
1131        write_log(
1132            release_id,
1133            'info',
1134            'Credits :%s',
1135            self.artist_credits[album])
1136
1137
1138def get_artists(options, release_id, tm, relations, relation_type):
1139    """
1140    Get artist info from XML lookup
1141    :param release_id: name for log file - usually =musicbrainz_albumid
1142        unless called outside metadata processor
1143    :param options:
1144    :param tm:
1145    :param relations:
1146    :param relation_type: 'release', 'recording' or 'work' (NB 'work' does not pass a param for tm)
1147    :return:
1148    """
1149    if options['log_debug'] or options['log_info']:
1150        write_log(
1151            release_id,
1152            'debug',
1153            'In get_artists. relation_type: %s, relations: %s',
1154            relation_type,
1155            relations)
1156    log_options = {
1157        'log_debug': options['log_debug'],
1158        'log_info': options['log_info']}
1159    artists = []
1160    instruments = []
1161    artist_types = const.RELATION_TYPES[relation_type]
1162    for artist_type in artist_types:
1163        artists, instruments = create_artist_data(release_id, options, log_options, tm, relations,
1164                                                  relation_type, artist_type, artists, instruments)
1165    artist_dict = {'artists': artists, 'instruments': instruments}
1166    return artist_dict
1167
1168
1169def create_artist_data(release_id, options, log_options, tm, relations,
1170                       relation_type, artist_type, artists, instruments):
1171    """
1172    Update the artists and instruments
1173    :param release_id: the current album id
1174    :param options:
1175    :param log_options:
1176    :param tm: track metadata
1177    :param relations:
1178    :param relation_type: release', 'recording' or 'work' (NB 'work' does not pass a param for tm)
1179    :param artist_type: from const.RELATION_TYPES[relation_type]
1180    :param artists: current artist list - updated with each call
1181    :param instruments: current instruments list - updated with each call
1182    :return: artists, instruments
1183    """
1184    type_list = parse_data(
1185        release_id,
1186        relations,
1187        [],
1188        'target-type:artist',
1189        'type:' +
1190        artist_type)
1191    for type_item in type_list:
1192        artist_name_list = parse_data(
1193            release_id, type_item, [], 'artist', 'name')
1194        artist_sort_name_list = parse_data(
1195            release_id, type_item, [], 'artist', 'sort-name')
1196        if artist_type not in [
1197            'instrument',
1198            'vocal',
1199            'instrument arranger',
1200            'vocal arranger']:
1201            instrument_list = None
1202            credited_inst_list = None
1203        else:
1204            instrument_list_list = parse_data(
1205                release_id, type_item, [], 'attributes')
1206            if instrument_list_list:
1207                instrument_list = instrument_list_list[0]
1208            else:
1209                instrument_list = []
1210            credited_inst_list = instrument_list[:]
1211            credited_inst_dict_list = parse_data(
1212                release_id, type_item, [], 'attribute-credits')  # keyed to insts
1213            if credited_inst_dict_list:
1214                credited_inst_dict = credited_inst_dict_list[0]
1215            else:
1216                credited_inst_dict = {}
1217            for i, inst in enumerate(instrument_list):
1218                if inst in credited_inst_dict:
1219                    credited_inst_list[i] = credited_inst_dict[inst]
1220
1221            if artist_type == 'vocal':
1222                if not instrument_list:
1223                    instrument_list = ['vocals']
1224                elif not any('vocals' in x for x in instrument_list):
1225                    instrument_list.append('vocals')
1226                    credited_inst_list.append('vocals')
1227        # fill the hidden vars before we choose to use the as-credited
1228        # version
1229        if relation_type != 'work':
1230            inst_tag = []
1231            cred_tag = []
1232            if instrument_list:
1233                inst_tag = list(set(instrument_list))
1234            if credited_inst_list:
1235                cred_tag = list(set(credited_inst_list))
1236            for attrib in ['solo', 'guest', 'additional']:
1237                if attrib in inst_tag:
1238                    inst_tag.remove(attrib)
1239                if attrib in cred_tag:
1240                    cred_tag.remove(attrib)
1241            if inst_tag:
1242                if tm['~cea_instruments']:
1243                    tm['~cea_instruments'] = add_list_uniquely(
1244                        tm['~cea_instruments'], inst_tag)
1245                else:
1246                    tm['~cea_instruments'] = inst_tag
1247            if cred_tag:
1248                if tm['~cea_instruments_credited']:
1249                    tm['~cea_instruments_credited'] = add_list_uniquely(
1250                        tm['~cea_instruments_credited'], cred_tag)
1251                else:
1252                    tm['~cea_instruments_credited'] = cred_tag
1253            if inst_tag or cred_tag:
1254                if tm['~cea_instruments_all']:
1255                    tm['~cea_instruments_all'] = add_list_uniquely(
1256                        tm['~cea_instruments_all'], list(set(inst_tag + cred_tag)))
1257                else:
1258                    tm['~cea_instruments_all'] = list(
1259                        set(inst_tag + cred_tag))
1260        if '~cea_instruments' in tm and '~cea_instruments_credited' in tm and '~cea_instruments_all' in tm:
1261            instruments = [
1262                tm['~cea_instruments'],
1263                tm['~cea_instruments_credited'],
1264                tm['~cea_instruments_all']]
1265        if options['cea_inst_credit'] and credited_inst_list:
1266            instrument_list = credited_inst_list
1267        if instrument_list:
1268            instrument_sort = 3
1269            s_key = {
1270                'lead vocals': 1,
1271                'solo': 2,
1272                'guest': 4,
1273                'additional': 5}
1274            for inst in s_key:
1275                if inst in instrument_list:
1276                    instrument_sort = s_key[inst]
1277        else:
1278            instrument_sort = 0
1279
1280        if artist_type in const.ARTIST_TYPE_ORDER:
1281            type_sort = const.ARTIST_TYPE_ORDER[artist_type]
1282        else:
1283            type_sort = 99
1284            if log_options['log_error']:
1285                write_log(
1286                    release_id,
1287                    'error',
1288                    "Error in artist type. Type '%s' is not in ARTIST_TYPE_ORDER dictionary",
1289                    artist_type)
1290
1291        artist = (
1292            artist_type,
1293            instrument_list,
1294            artist_name_list,
1295            artist_sort_name_list,
1296            instrument_sort,
1297            type_sort)
1298        artists.append(artist)
1299        # Sorted by sort name then instrument_sort then artist type
1300        artists = sorted(artists, key=lambda x: (x[5], x[3], x[4], x[1]))
1301        if log_options['log_info']:
1302            write_log(release_id, 'info', 'sorted artists = %s', artists)
1303    return artists, instruments
1304
1305
1306def get_series(options, release_id, relations):
1307    """
1308    Get series info (depends on lookup having used inc=series-rel)
1309    :param options:
1310    :param release_id:
1311    :param relations:
1312    :return:
1313    """
1314    # if options['log_debug'] or options['log_info']:
1315    #     write_log(
1316    #         release_id,
1317    #         'debug',
1318    #         'In get_series.  relations: %s',
1319    #         relations)
1320    # series_name_list =[]
1321    # series_id_list = []
1322    # for series_rels in relations:
1323    #     series_rel = parse_data(
1324    #         release_id,
1325    #         series_rels,
1326    #         [],
1327    #         'target-type:series',
1328    #         'type:part-of')
1329    #     if options['log_debug'] or options['log_info']:
1330    #         write_log(
1331    #             release_id,
1332    #             'debug',
1333    #             'series_rel =  %s',
1334    #             series_rel)
1335    #     series_name_list.extend(
1336    #         parse_data(release_id, series_rel, [], 'series', 'name')
1337    #     )
1338    #     series_id_list.extend(
1339    #         parse_data(release_id, series_rel, [], 'series', 'id')
1340    #     )
1341    type_list = parse_data(
1342        release_id,
1343        relations,
1344        [],
1345        'target-type:series',
1346        'type:part of')
1347    if type_list:
1348        series_name_list = []
1349        series_id_list = []
1350        series_number_list = []
1351        for type_item in type_list:
1352            series_name_list = parse_data(
1353                release_id, type_item, [], 'series', 'name')
1354            series_id_list = parse_data(
1355                release_id, type_item, [], 'series', 'id')
1356            series_number_list = parse_data(
1357                release_id, type_item, [], 'attribute-values', 'number')
1358        return {'name_list': series_name_list, 'id_list': series_id_list, 'number_list': series_number_list}
1359    else:
1360        return None
1361
1362
1363
1364def apply_artist_style(
1365        options,
1366        release_id,
1367        lang,
1368        a_list,
1369        name_style,
1370        name_tag,
1371        sort_tag,
1372        names_tag,
1373        names_sort_tag):
1374    # Get  artist and apply style
1375    for a_item in a_list:
1376        for acs in a_item:
1377            artistlist = parse_data(release_id, acs, [], 'name')
1378            sortlist = parse_data(release_id, acs, [], 'artist', 'sort-name')
1379            names = {}
1380            if lang:
1381                names['alias'] = parse_data(
1382                    release_id,
1383                    acs,
1384                    [],
1385                    'artist',
1386                    'aliases',
1387                    'locale:' + lang,
1388                    'primary:True',
1389                    'name')
1390            else:
1391                names['alias'] = []
1392            names['credit'] = parse_data(release_id, acs, [], 'name')
1393            pairslist = list(zip(artistlist, sortlist))
1394            names['sort'] = [
1395                translate_from_sortname(
1396                    *pair) for pair in pairslist]
1397            for style in name_style:
1398                if names[style]:
1399                    artistlist = names[style]
1400                    break
1401            joinlist = parse_data(release_id, acs, [], 'joinphrase')
1402
1403            if artistlist:
1404                name_tag.append(artistlist[0])
1405                sort_tag.append(sortlist[0])
1406                names_tag.append(artistlist[0])
1407                names_sort_tag.append(sortlist[0])
1408
1409            if joinlist:
1410                name_tag.append(joinlist[0])
1411                sort_tag.append(joinlist[0])
1412
1413    name_tag_str = ''.join(name_tag)
1414    sort_tag_str = ''.join(sort_tag)
1415
1416    return {
1417        'artists': names_tag,
1418        'artists_sort': names_sort_tag,
1419        'artist': name_tag_str,
1420        'artistsort': sort_tag_str}
1421
1422
1423def set_work_artists(self, release_id, album, track, writerList, tm, count):
1424    """
1425    :param release_id:
1426    :param self is the calling object from Artists or WorkParts
1427    :param album: the current album
1428    :param track: the current track
1429    :param writerList: format [(artist_type, [instrument_list], [name list],[sort_name list]),(.....etc]
1430    :param tm: track metadata
1431    :param count: depth count of recursion in process_work_artists (should equate to part level)
1432    :return:
1433    """
1434
1435    options = self.options[track]
1436    if not options['classical_work_parts']:
1437        caller = 'ExtraArtists'
1438        pre = '~cea'
1439    else:
1440        caller = 'PartLevels'
1441        pre = '~cwp'
1442    write_log(
1443        release_id,
1444        'debug',
1445        'Class: %s: in set_work_artists for track %s. Count (level) is %s. Writer list is %s',
1446        caller,
1447        track,
1448        count,
1449        writerList)
1450    # tag strings are a tuple (Picard tag, cwp tag, Picard sort tag, cwp sort
1451    # tag) (NB this is modelled on set_performer)
1452    tag_strings = const.tag_strings(pre)
1453    # insertions lists artist types where names in the main Picard tags may be
1454    # updated for annotations
1455    insertions = const.INSERTIONS
1456    no_more_lyricists = False
1457    if caller == 'PartLevels' and self.lyricist_filled[track]:
1458        no_more_lyricists = True
1459
1460    for writer in writerList:
1461        writer_type = writer[0]
1462        if writer_type not in tag_strings:
1463            break
1464        if no_more_lyricists and (
1465                writer_type == 'lyricist' or writer_type == 'librettist'):
1466            break
1467        if writer[1]:
1468            inst_list = writer[1][:]
1469            # take a copy of the list in case (because of list
1470            # mutability) we need the old one
1471            instrument = ", ".join(inst_list)
1472        else:
1473            instrument = None
1474        sub_strings = {  # 'instrument arranger': instrument, 'vocal arranger': instrument
1475        }
1476        if options['cea_arranger']:
1477            if instrument:
1478                arr_inst = options['cea_arranger'] + ' ' + instrument
1479            else:
1480                arr_inst = options['cea_arranger']
1481        else:
1482            arr_inst = instrument
1483        annotations = {'writer': options['cea_writer'],
1484                       'lyricist': options['cea_lyricist'],
1485                       'librettist': options['cea_librettist'],
1486                       'revised by': options['cea_revised'],
1487                       'translator': options['cea_translator'],
1488                       'arranger': options['cea_arranger'],
1489                       'reconstructed by': options['cea_reconstructed'],
1490                       'orchestrator': options['cea_orchestrator'],
1491                       'instrument arranger': arr_inst,
1492                       'vocal arranger': arr_inst}
1493        tag = tag_strings[writer_type][0]
1494        sort_tag = tag_strings[writer_type][2]
1495        cwp_tag = tag_strings[writer_type][1]
1496        cwp_sort_tag = tag_strings[writer_type][3]
1497        cwp_names_tag = cwp_tag[:-1] + '_names'
1498        cwp_instrumented_tag = cwp_names_tag + '_instrumented'
1499        if writer_type in sub_strings:
1500            if sub_strings[writer_type]:
1501                tag += sub_strings[writer_type]
1502        if tag:
1503            if '~ce_tag_cleared_' + \
1504                    tag not in tm or not tm['~ce_tag_cleared_' + tag] == "Y":
1505                if tag in tm:
1506                    if options['log_info']:
1507                        write_log(release_id, 'info', 'delete tag %s', tag)
1508                    del tm[tag]
1509            tm['~ce_tag_cleared_' + tag] = "Y"
1510        if sort_tag:
1511            if '~ce_tag_cleared_' + \
1512                    sort_tag not in tm or not tm['~ce_tag_cleared_' + sort_tag] == "Y":
1513                if sort_tag in tm:
1514                    del tm[sort_tag]
1515            tm['~ce_tag_cleared_' + sort_tag] = "Y"
1516
1517        name_list = writer[2]
1518        for ind, name in enumerate(name_list):
1519            sort_name = writer[3][ind]
1520            no_credit = True
1521            write_log(
1522                    release_id,
1523                    'info',
1524                    'In set_work_artists. Name before changes = %s',
1525                    name)
1526            # change name to as-credited
1527            if options['cea_composer_credited']:
1528                if album in self.artist_credits and sort_name in self.artist_credits[album]:
1529                    no_credit = False
1530                    name = self.artist_credits[album][sort_name]
1531            # over-ride with aliases if appropriate
1532            if (options['cea_aliases'] or options['cea_aliases_composer']) and (
1533                    no_credit or options['cea_alias_overrides']):
1534                if sort_name in self.artist_aliases:
1535                    name = self.artist_aliases[sort_name]
1536            # fix cyrillic names if not already fixed
1537            if options['cea_cyrillic']:
1538                if not only_roman_chars(name):
1539                    name = remove_middle(unsort(sort_name))
1540                    # Only remove middle name where the existing
1541                    # performer is in non-latin script
1542            annotated_name = name
1543            write_log(
1544                    release_id,
1545                    'info',
1546                    'In set_work_artists. Name after changes = %s',
1547                    name)
1548            # add annotations and write performer tags
1549            if writer_type in annotations:
1550                if annotations[writer_type]:
1551                    annotated_name += ' (' + annotations[writer_type] + ')'
1552            if instrument:
1553                instrumented_name = name + ' (' + instrument + ')'
1554            else:
1555                instrumented_name = name
1556
1557            if writer_type in insertions and options['cea_arrangers']:
1558                self.append_tag(release_id, tm, tag, annotated_name)
1559            else:
1560                if options['cea_arrangers'] or writer_type == tag:
1561                    self.append_tag(release_id, tm, tag, name)
1562
1563            if options['cea_arrangers'] or writer_type == tag:
1564                if sort_tag:
1565                    self.append_tag(release_id, tm, sort_tag, sort_name)
1566                    if options['cea_tag_sort'] and '~' in sort_tag:
1567                        explicit_sort_tag = sort_tag.replace('~', '')
1568                        self.append_tag(
1569                            release_id, tm, explicit_sort_tag, sort_name)
1570            self.append_tag(release_id, tm, cwp_tag, annotated_name)
1571            self.append_tag(release_id, tm, cwp_names_tag, name)
1572            if instrumented_name != name:
1573                self.append_tag(
1574                    release_id,
1575                    tm,
1576                    cwp_instrumented_tag,
1577                    instrumented_name)
1578
1579            if cwp_sort_tag:
1580                self.append_tag(release_id, tm, cwp_sort_tag, sort_name)
1581
1582            if caller == 'PartLevels' and (
1583                    writer_type == 'lyricist' or writer_type == 'librettist'):
1584                self.lyricist_filled[track] = True
1585                write_log(
1586                        release_id,
1587                        'info',
1588                        'Filled lyricist for track %s. Not looking further',
1589                        track)
1590
1591            if writer_type == 'composer':
1592                composerlast = sort_name.split(",")[0]
1593                write_log(
1594                        release_id,
1595                        'info',
1596                        'composerlast = %s',
1597                        composerlast)
1598                self.append_tag(
1599                    release_id,
1600                    tm,
1601                    pre +
1602                    '_composer_lastnames',
1603                    composerlast)
1604                if sort_name in self.release_artists_sort[album]:
1605                    self.append_tag(
1606                        release_id, tm, '~cea_album_composers', name)
1607                    self.append_tag(
1608                        release_id, tm, '~cea_album_composers_sort', sort_name)
1609                    self.append_tag(
1610                        release_id,
1611                        tm,
1612                        '~cea_album_track_composer_lastnames',
1613                        composerlast)
1614                    composer_last_names(self, release_id, tm, album)
1615
1616
1617# Non-Latin character processing
1618latin_letters = {}
1619
1620def is_latin(uchr):
1621    """Test whether character is in Latin script"""
1622    try:
1623        return latin_letters[uchr]
1624    except KeyError:
1625        return latin_letters.setdefault(
1626            uchr, 'LATIN' in unicodedata.name(uchr))
1627
1628
1629def only_roman_chars(unistr):
1630    """Test whether string is in Latin script"""
1631    return all(is_latin(uchr)
1632               for uchr in unistr
1633               if uchr.isalpha())
1634
1635
1636def get_roman(string):
1637    """Transliterate cyrillic script to Latin script"""
1638    translit_string = ""
1639    for index, char in enumerate(string):
1640        if char in const.CYRILLIC_LOWER.keys():
1641            char = const.CYRILLIC_LOWER[char]
1642        elif char in const.CYRILLIC_UPPER.keys():
1643            char = const.CYRILLIC_UPPER[char]
1644            if string[index + 1] not in const.CYRILLIC_LOWER.keys():
1645                char = char.upper()
1646        translit_string += char
1647    # fix multi-chars
1648    translit_string = translit_string.replace('ks', 'x').replace('iy ', 'i ')
1649    return translit_string
1650
1651
1652def remove_middle(performer):
1653    """To remove middle names of Russian composers"""
1654    plist = performer.split()
1655    if len(plist) == 3:
1656        return plist[0] + ' ' + plist[2]
1657    else:
1658        return performer
1659
1660
1661# Sorting etc.
1662
1663def unsort(performer):
1664    """
1665    To take a sort field and recreate the name
1666    Only now used for last-ditch cyrillic translation - superseded by 'translate_from_sortname'
1667    """
1668    sorted_list = performer.split(', ')
1669    sorted_list.reverse()
1670    for i, item in enumerate(sorted_list):
1671        if item[-1] != "'":
1672            sorted_list[i] += ' '
1673    return ''.join(sorted_list).strip()
1674
1675
1676def _reverse_sortname(sortname):
1677    """
1678    Reverse sortnames.
1679    Code is from picard/util/__init__.py
1680    """
1681
1682    chunks = [a.strip() for a in sortname.split(",")]
1683    chunk_len = len(chunks)
1684    if chunk_len == 2:
1685        return "%s %s" % (chunks[1], chunks[0])
1686    elif chunk_len == 3:
1687        return "%s %s %s" % (chunks[2], chunks[1], chunks[0])
1688    elif chunk_len == 4:
1689        return "%s %s, %s %s" % (chunks[1], chunks[0], chunks[3], chunks[2])
1690    else:
1691        return sortname.strip()
1692
1693
1694def stripsir(performer):
1695    """
1696    Remove honorifics from names
1697    Also standardize hyphens and apostrophes in names
1698    """
1699    performer = performer.replace(u'\u2010', u'-').replace(u'\u2019', u"'")
1700    sir = re.compile(r'(.*)\b(Sir|Maestro|Dame)\b\s*(.*)', re.IGNORECASE)
1701    match = sir.search(performer)
1702    if match:
1703        return match.group(1) + match.group(3)
1704    else:
1705        return performer
1706
1707
1708# def swap_prefix(performer):
1709#     """NOT CURRENTLY USED. Create sort fields for ensembles etc., by placing the prefix (see constants) at the end"""
1710#     prefix = '|'.join(prefixes)
1711#     swap = re.compile(r'^(' + prefix + r')\b\s*(.*)', re.IGNORECASE)
1712#     match = swap.search(performer)
1713#     if match:
1714#         return match.group(2) + ", " + match.group(1)
1715#     else:
1716#         return performer
1717
1718
1719def replace_roman_numerals(s):
1720    """Replaces roman numerals include in s, where followed by certain punctuation, by digits"""
1721    romans = RE_ROMANS.findall(s)
1722    for roman in romans:
1723        if roman[0]:
1724            numerals = str(roman[0])
1725            digits = str(from_roman(numerals))
1726            to_replace = r'\b' + roman[0] + r'\b'
1727            s = re.sub(to_replace, digits, s)
1728    return s
1729
1730
1731def from_roman(s):
1732    romanNumeralMap = (('M', 1000),
1733                       ('CM', 900),
1734                       ('D', 500),
1735                       ('CD', 400),
1736                       ('C', 100),
1737                       ('XC', 90),
1738                       ('L', 50),
1739                       ('XL', 40),
1740                       ('X', 10),
1741                       ('IX', 9),
1742                       ('V', 5),
1743                       ('IV', 4),
1744                       ('I', 1),
1745                       ('m', 1000),
1746                       ('cm', 900),
1747                       ('d', 500),
1748                       ('cd', 400),
1749                       ('c', 100),
1750                       ('xc', 90),
1751                       ('l', 50),
1752                       ('xl', 40),
1753                       ('x', 10),
1754                       ('ix', 9),
1755                       ('v', 5),
1756                       ('iv', 4),
1757                       ('i', 1))
1758    result = 0
1759    index = 0
1760    for numeral, integer in romanNumeralMap:
1761        while s[index:index + len(numeral)] == numeral:
1762            result += integer
1763            index += len(numeral)
1764    return result
1765
1766
1767def turbo_lcs(release_id, multi_list):
1768    """
1769    Picks the best longest common string method to use
1770    Works with a list of lists or a list of strings
1771    :param release_id:
1772    :param multi_list: a list of strings or a list of lists
1773    :return: longest common substring/list
1774    """
1775    write_log(release_id, 'debug', 'In turbo_lcs')
1776    if not isinstance(multi_list, list):
1777        return None
1778    list_sum = sum([len(x) for x in multi_list])
1779    list_len = len(multi_list)
1780    if list_len < 2:
1781        if list_len == 1:
1782            return multi_list[0]  # Nothing to do!
1783        else:
1784            return []
1785    # for big matches, use the generalised suffix tree method
1786    if ((list_sum / list_len) ** 2) * list_len > 1000:
1787        # heuristic: may need to tweak the 1000 in the light of results
1788        lcs_dict = suffixtree.multi_lcs(multi_list)
1789        # NB suffixtree may be shown as an unresolved reference in the IDE,
1790        # but it should work provided it is included in the package
1791        if "error" not in lcs_dict:
1792            if "response" in lcs_dict:
1793                write_log(
1794                        release_id,
1795                        'info',
1796                        'Longest common string was returned from suffix tree algo')
1797                return lcs_dict['response']
1798
1799     ## If suffix tree fails, write errors to log before proceeding with alternative
1800            else:
1801                write_log(
1802                        release_id,
1803                        'error',
1804                        'Suffix tree failure for release %s. Error unknown. Using standard lcs algo instead',
1805                        release_id)
1806        else:
1807            write_log(
1808                    release_id,
1809                    'error',
1810                    'Suffix tree failure for release %s. Error message: %s. Using standard lcs algo instead',
1811                    release_id,
1812                    lcs_dict['error'])
1813    # otherwise, or if gst fails, use the standard algorithm
1814    first = True
1815    common = []
1816    for item in multi_list:
1817        if first:
1818            common = item
1819            first = False
1820        else:
1821            lcs = longest_common_substring(
1822                item, common)
1823            common = lcs['string']
1824    write_log(release_id, 'debug', 'LCS returned from standard algo')
1825    return common
1826
1827
1828def longest_common_substring(s1, s2):
1829    """
1830    Standard lcs algo for short strings, or if suffix tree does not work
1831    :param s1: substring 1
1832    :param s2: substring 2
1833    :return: {'string': the longest common substring,
1834        'start': the start position in s1,
1835        'length': the length of the common substring}
1836    NB this also works on list arguments - i.e. it will find the longest common sub-list
1837    """
1838    m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))]
1839    longest, x_longest = 0, 0
1840    for x in range(1, 1 + len(s1)):
1841        for y in range(1, 1 + len(s2)):
1842            if s1[x - 1] == s2[y - 1]:
1843                m[x][y] = m[x - 1][y - 1] + 1
1844                if m[x][y] > longest:
1845                    longest = m[x][y]
1846                    x_longest = x
1847            else:
1848                m[x][y] = 0
1849    return {'string': s1[x_longest - longest: x_longest],
1850            'start': x_longest - longest, 'length': longest}
1851
1852
1853def longest_common_sequence(list1, list2, minstart=0, maxstart=0):
1854    """
1855    :param list1: list 1
1856    :param list2: list 2
1857    :param minstart: the earliest point to start looking for a match
1858    :param maxstart: the latest point to start looking for a match
1859    :return: {'sequence': the common subsequence, 'length': length of subsequence}
1860    maxstart must be >= minstart. If they are equal then the start point is fixed.
1861    Note that this only finds subsequences starting at the same position
1862    Use longest_common_substring for the more general problem
1863    """
1864    if maxstart < minstart:
1865        return None, 0
1866    min_len = min(len(list1), len(list2))
1867    longest = 0
1868    seq = None
1869    maxstart = min(maxstart, min_len) + 1
1870    for k in range(minstart, maxstart):
1871        for i in range(k, min_len + 1):
1872            if list1[k:i] == list2[k:i] and i - k > longest:
1873                longest = i - k
1874                seq = list1[k:i]
1875    return {'sequence': seq, 'length': longest}
1876
1877
1878def substart_finder(mylist, pattern):
1879    for i, list_item in enumerate(mylist):
1880        if list_item == pattern[0] and mylist[i:i + len(pattern)] == pattern:
1881            return i
1882    return len(mylist)  # if nothing found
1883
1884
1885def get_ui_tags():
1886## Determine tags for display in ui
1887    options = config.setting
1888    ui_tags_raw = options['ce_ui_tags']
1889    ui_tags = {}
1890    ui_tags_split = [x.replace('(','').strip(') ') for x in ui_tags_raw.split('/')]
1891    for ui_column in ui_tags_split:
1892        if ':'  in ui_column:
1893            ui_col_parts = [x.strip() for x in ui_column.split(':')]
1894            heading = ui_col_parts[0]
1895            tag_names = ui_col_parts[1].split(',')
1896            tag_names = [x.strip() for x in tag_names]
1897            ui_tags[heading] = tuple(tag_names)
1898    return ui_tags
1899
1900
1901def map_tags(options, release_id, album, tm):
1902    """
1903    Do the common tag processing - including for the genres and tag-mapping sections
1904    :param release_id: name for log file - usually =musicbrainz_albumid
1905        unless called outside metadata processor
1906    :param options: options passed from either Artists or Workparts
1907    :param album:
1908    :param tm: track metadata
1909    :return: None - action is through setting tm contents
1910    This is a common function for Artists and Workparts which should only run after both sections have completed for
1911    a given track. If, say, Artists calls it and Workparts is not done,
1912    then it will not execute until Workparts calls it (and vice versa).
1913    """
1914
1915    write_log(release_id, 'debug', 'In map_tags, checking readiness...')
1916    if (options['classical_extra_artists'] and '~cea_artists_complete' not in tm) or (
1917            options['classical_work_parts'] and '~cea_works_complete' not in tm):
1918        write_log(release_id, 'info', '...not ready')
1919        return
1920    write_log(release_id, 'debug', '... processing tag mapping')
1921
1922    # blank tags
1923    blank_tags = options['cea_blank_tag'].split(
1924        ",") + options['cea_blank_tag_2'].split(",")
1925    if 'artists_sort' in [x.strip() for x in blank_tags]:
1926        blank_tags.append('~artists_sort')
1927    for tag in blank_tags:
1928        if tag.strip() in tm:
1929            # place blanked tags into hidden variables available for
1930            # re-use
1931            tm['~cea_' + tag.strip()] = tm[tag.strip()]
1932            del tm[tag.strip()]
1933
1934    # album
1935    if tm['~cea_album_composer_lastnames']:
1936        last_names = str_to_list(tm['~cea_album_composer_lastnames'])
1937        if options['cea_composer_album']:
1938            # save it as a list to prevent splitting when appending tag
1939            tm['~cea_release'] = [tm['album']]
1940            new_last_names = []
1941            for last_name in last_names:
1942                last_name = last_name.strip()
1943                new_last_names.append(last_name)
1944            if len(new_last_names) > 0:
1945                tm['album'] = "; ".join(new_last_names) + ": " + tm['album']
1946
1947    # remove lyricists if no vocals, according to option set
1948    if options['cea_no_lyricists'] and not any(
1949            [x for x in str_to_list(tm['~cea_performers']) if 'vocals' in x]):
1950        if 'lyricist' in tm:
1951            del tm['lyricist']
1952        for lyricist_tag in ['lyricists', 'librettists', 'translators']:
1953            if '~cwp_' + lyricist_tag in tm:
1954                del tm['~cwp_' + lyricist_tag]
1955
1956    # genres
1957    if config.setting['folksonomy_tags'] and 'genre' in tm:
1958        candidate_genres = str_to_list(tm['genre'])
1959        append_tag(release_id, tm, '~cea_candidate_genres', candidate_genres)
1960        # to avoid confusion as it will contain unmatched folksonomy tags
1961        del tm['genre']
1962    else:
1963        candidate_genres = []
1964    is_classical = False
1965    composers_not_found = []
1966    composer_found = False
1967    composer_born_list = []
1968    composer_died_list = []
1969    arrangers_not_found = []
1970    arranger_found = False
1971    arranger_born_list = []
1972    arranger_died_list = []
1973    no_composer_in_metadata = False
1974    if options['cwp_use_muso_refdb'] and options['cwp_muso_classical'] or options['cwp_muso_dates']:
1975        if COMPOSER_DICT:
1976            composersort_list = []
1977            if '~cwp_composer_names' in tm:
1978                composer_list = str_to_list(tm['~cwp_composer_names'])
1979            else:
1980                # maybe there were no works linked,
1981                # but it might still a classical track (based on composer name)
1982                no_composer_in_metadata = True
1983                composer_list = str_to_list(tm['artists'])
1984                composersort_list = str_to_list(tm['~artists_sort'])
1985                write_log(release_id, 'info', "No composer metadata for track %s. Using artists %r", tm['title'],
1986                          composer_list)
1987            lc_composer_list = [c.lower() for c in composer_list]
1988            for ind, composer in enumerate(lc_composer_list):
1989                for classical_composer in COMPOSER_DICT:
1990                    if composer in classical_composer['lc_name']:
1991                        if options['cwp_muso_classical']:
1992                            candidate_genres.append('Classical')
1993                            is_classical = True
1994                        if options['cwp_muso_dates']:
1995                            composer_born_list = classical_composer['birth']
1996                            composer_died_list = classical_composer['death']
1997                        composer_found = True
1998                        if no_composer_in_metadata:
1999                            composersort = composersort_list[ind]
2000                            append_tag(release_id, tm, 'composer', composer_list[ind])
2001                            append_tag(release_id, tm, '~cwp_composer_names', composer_list[ind])
2002                            append_tag(release_id, tm, 'composersort', composersort)
2003                            append_tag(release_id, tm, '~cwp_composers_sort', composersort)
2004                            append_tag(release_id, tm, '~cwp_composer_lastnames', composersort.split(', ')[0])
2005                        break
2006                if not composer_found:
2007                    composer_index = lc_composer_list.index(composer)
2008                    orig_composer = composer_list[composer_index]
2009                    composers_not_found.append(orig_composer)
2010                    append_tag(
2011                        release_id,
2012                        tm,
2013                        '~cwp_unrostered_composers',
2014                        orig_composer)
2015            if composers_not_found:
2016                append_tag(
2017                    release_id,
2018                    tm,
2019                    '003_information:composers',
2020                    'Composer(s) ' +
2021                    list_to_str(composers_not_found) +
2022                    ' not found in reference database of classical composers')
2023
2024            # do the same for arrangers, if required
2025            if options['cwp_genres_arranger_as_composer'] or options['cwp_periods_arranger_as_composer']:
2026                arranger_list = str_to_list(
2027                    tm['~cea_arranger_names']) + str_to_list(tm['~cwp_arranger_names'])
2028                lc_arranger_list = [c.lower() for c in arranger_list]
2029                for arranger in lc_arranger_list:
2030                    for classical_arranger in COMPOSER_DICT:
2031                        if arranger in classical_arranger['lc_name']:
2032                            if options['cwp_muso_classical'] and options['cwp_genres_arranger_as_composer']:
2033                                candidate_genres.append('Classical')
2034                                is_classical = True
2035                            if options['cwp_muso_dates'] and options['cwp_periods_arranger_as_composer']:
2036                                arranger_born_list = classical_arranger['birth']
2037                                arranger_died_list = classical_arranger['death']
2038                            arranger_found = True
2039                            break
2040                    if not arranger_found:
2041                        arranger_index = lc_arranger_list.index(arranger)
2042                        orig_arranger = arranger_list[arranger_index]
2043                        arrangers_not_found.append(orig_arranger)
2044                        append_tag(
2045                            release_id,
2046                            tm,
2047                            '~cwp_unrostered_arrangers',
2048                            orig_arranger)
2049                if arrangers_not_found:
2050                    append_tag(
2051                        release_id,
2052                        tm,
2053                        '003_information:arrangers',
2054                        'Arranger(s) ' +
2055                        list_to_str(arrangers_not_found) +
2056                        ' not found in reference database of classical composers')
2057
2058        else:
2059            append_tag(
2060                release_id,
2061                tm,
2062                '001_errors:8',
2063                '8. No composer reference file. Check log for error messages re path name.')
2064
2065    if options['cwp_use_muso_refdb'] and options['cwp_muso_genres'] and GENRE_DICT:
2066        main_classical_genres_list = [list_to_str(
2067            mg['name']).strip() for mg in GENRE_DICT]
2068    else:
2069        main_classical_genres_list = [
2070            sg.strip() for sg in options['cwp_genres_classical_main'].split(',')]
2071    sub_classical_genres_list = [
2072        sg.strip() for sg in options['cwp_genres_classical_sub'].split(',')]
2073    main_other_genres_list = [
2074        sg.strip() for sg in options['cwp_genres_other_main'].split(',')]
2075    sub_other_genres_list = [sg.strip()
2076                             for sg in options['cwp_genres_other_sub'].split(',')]
2077    main_classical_genres = []
2078    sub_classical_genres = []
2079    main_other_genres = []
2080    sub_other_genres = []
2081    if '~cea_work_type' in tm:
2082        candidate_genres += str_to_list(tm['~cea_work_type'])
2083    if '~cwp_candidate_genres' in tm:
2084        candidate_genres += str_to_list(tm['~cwp_candidate_genres'])
2085    write_log(release_id, 'info', "Candidate genres: %r", candidate_genres)
2086    untagged_genres = []
2087    if candidate_genres:
2088        main_classical_genres = [
2089            val for val in main_classical_genres_list if val.lower() in [
2090                genre.lower() for genre in candidate_genres]]
2091        sub_classical_genres = [
2092            val for val in sub_classical_genres_list if val.lower() in [
2093                genre.lower() for genre in candidate_genres]]
2094
2095        if main_classical_genres or sub_classical_genres or options['cwp_genres_classical_all']:
2096            is_classical = True
2097            main_classical_genres.append('Classical')
2098            candidate_genres.append('Classical')
2099            write_log(release_id, 'info', "Main classical genres for track %s: %r", tm['title'], main_classical_genres)
2100            candidate_genres += str_to_list(tm['~cea_work_type_if_classical'])
2101            # next two are repeated statements, but a separate fn would be
2102            # clumsy too!
2103            main_classical_genres = [
2104                val for val in main_classical_genres_list if val.lower() in [
2105                    genre.lower() for genre in candidate_genres]]
2106            sub_classical_genres = [
2107                val for val in sub_classical_genres_list if val.lower() in [
2108                    genre.lower() for genre in candidate_genres]]
2109        if options['cwp_genres_classical_exclude']:
2110            main_classical_genres = [
2111                g for g in main_classical_genres if g.lower() != 'classical']
2112
2113        main_other_genres = [
2114            val for val in main_other_genres_list if val.lower() in [
2115                genre.lower() for genre in candidate_genres]]
2116        sub_other_genres = [
2117            val for val in sub_other_genres_list if val.lower() in [
2118                genre.lower() for genre in candidate_genres]]
2119        all_genres = main_classical_genres + sub_classical_genres + \
2120            main_other_genres + sub_other_genres
2121        untagged_genres = [
2122            un for un in candidate_genres if un.lower() not in [
2123                genre.lower() for genre in all_genres]]
2124
2125    if options['cwp_genre_tag']:
2126        if not options['cwp_genres_filter']:
2127            append_tag(
2128                release_id,
2129                tm,
2130                options['cwp_genre_tag'],
2131                candidate_genres)
2132        else:
2133            append_tag(
2134                release_id,
2135                tm,
2136                options['cwp_genre_tag'],
2137                main_classical_genres +
2138                main_other_genres)
2139    if options['cwp_subgenre_tag'] and options['cwp_genres_filter']:
2140        append_tag(
2141            release_id,
2142            tm,
2143            options['cwp_subgenre_tag'],
2144            sub_classical_genres +
2145            sub_other_genres)
2146    if is_classical and options['cwp_genres_flag_text'] and options['cwp_genres_flag_tag']:
2147        tm[options['cwp_genres_flag_tag']] = options['cwp_genres_flag_text']
2148    if not (
2149            main_classical_genres +
2150            main_other_genres)and options['cwp_genres_filter']:
2151        if options['cwp_genres_default']:
2152            append_tag(
2153                release_id,
2154                tm,
2155                options['cwp_genre_tag'],
2156                options['cwp_genres_default'])
2157        else:
2158            if options['cwp_genre_tag'] in tm:
2159                del tm[options['cwp_genre_tag']]
2160    if untagged_genres and options['cwp_genres_filter']:
2161        append_tag(
2162            release_id,
2163            tm,
2164            '003_information:genres',
2165            'Candidate genres found but not matched: ' +
2166            list_to_str(untagged_genres))
2167        append_tag(release_id, tm, '~cwp_untagged_genres', untagged_genres)
2168
2169    # instruments and keys
2170    if options['cwp_instruments_MB_names'] and options['cwp_instruments_credited_names'] and tm['~cea_instruments_all']:
2171        instruments = str_to_list(tm['~cea_instruments_all'])
2172    elif options['cwp_instruments_MB_names'] and tm['~cea_instruments']:
2173        instruments = str_to_list(tm['~cea_instruments'])
2174    elif options['cwp_instruments_credited_names'] and tm['~cea_instruments_credited']:
2175        instruments = str_to_list(tm['~cea_instruments_credited'])
2176    else:
2177        instruments = None
2178    if instruments and options['cwp_instruments_tag']:
2179        append_tag(release_id, tm, options['cwp_instruments_tag'], instruments)
2180        # need to append rather than over-write as it may be the same as
2181        # another tag (e.g. genre)
2182    if tm['~cwp_keys'] and options['cwp_key_tag']:
2183        append_tag(release_id, tm, options['cwp_key_tag'], tm['~cwp_keys'])
2184    # dates
2185    if options['cwp_workdate_annotate']:
2186        comp = ' (composed)'
2187        publ = ' (published)'
2188        prem = ' (premiered)'
2189    else:
2190        comp = ''
2191        publ = ''
2192        prem = ''
2193    tm[options['cwp_workdate_tag']] = ''
2194    earliest_date = 9999
2195    latest_date = -9999
2196    found = False
2197    if tm['~cwp_composed_dates']:
2198        composed_dates_list = str_to_list(tm['~cwp_composed_dates'])
2199        if len(composed_dates_list) > 1:
2200            composed_dates_list = str_to_list(
2201                composed_dates_list[0])  # use dates of lowest-level work
2202        earliest_date = min([int(dates.split(DATE_SEP)[0].strip())
2203                             for dates in composed_dates_list])
2204        append_tag(
2205            release_id,
2206            tm,
2207            options['cwp_workdate_tag'],
2208            list_to_str(composed_dates_list) +
2209            comp)
2210        found = True
2211    if tm['~cwp_published_dates'] and (
2212            not found or options['cwp_workdate_use_all']):
2213        if not found:
2214            published_dates_list = str_to_list(tm['~cwp_published_dates'])
2215            if len(published_dates_list) > 1:
2216                published_dates_list = str_to_list(
2217                    published_dates_list[0])  # use dates of lowest-level work
2218            earliest_date = min([int(dates.split(DATE_SEP)[0].strip())
2219                                 for dates in published_dates_list])
2220            append_tag(
2221                release_id,
2222                tm,
2223                options['cwp_workdate_tag'],
2224                list_to_str(published_dates_list) +
2225                publ)
2226            found = True
2227    if tm['~cwp_premiered_dates'] and (
2228            not found or options['cwp_workdate_use_all']):
2229        if not found:
2230            premiered_dates_list = str_to_list(tm['~cwp_premiered_dates'])
2231            if len(premiered_dates_list) > 1:
2232                premiered_dates_list = str_to_list(
2233                    premiered_dates_list[0])  # use dates of lowest-level work
2234            earliest_date = min([int(dates.split(DATE_SEP)[0].strip())
2235                                 for dates in premiered_dates_list])
2236            append_tag(
2237                release_id,
2238                tm,
2239                options['cwp_workdate_tag'],
2240                list_to_str(premiered_dates_list) +
2241                prem)
2242
2243    # periods
2244    PERIODS = {}
2245    if options['cwp_period_map']:
2246        if options['cwp_use_muso_refdb'] and options['cwp_muso_periods'] and PERIOD_DICT:
2247            for p_item in PERIOD_DICT:
2248                if 'start' not in p_item or p_item['start'] == []:
2249                    p_item['start'] = [u'-9999']
2250                if 'end' not in p_item or p_item['end'] == []:
2251                    p_item['end'] = [u'2525']
2252                if 'name' not in p_item or p_item['name'] == []:
2253                    p_item['name'] = ['NOT SPECIFIED']
2254            PERIODS = {list_to_str(mp['name']).strip(): (
2255                list_to_str(mp['start']),
2256                list_to_str(mp['end']))
2257                for mp in PERIOD_DICT}
2258            for period in PERIODS:
2259                if PERIODS[period][0].lstrip(
2260                        '-').isdigit() and PERIODS[period][1].lstrip('-').isdigit():
2261                    PERIODS[period] = (int(PERIODS[period][0]),
2262                                       int(PERIODS[period][1]))
2263                else:
2264                    PERIODS[period] = (
2265                        9999,
2266                        'ERROR - start and/or end of ' +
2267                        period +
2268                        ' are not integers')
2269
2270        else:
2271            periods = [p.strip() for p in options['cwp_period_map'].split(';')]
2272            for p in periods:
2273                p = p.split(',')
2274                if len(p) == 3:
2275                    period = p[0].strip()
2276                    start = p[1].strip()
2277                    end = p[2].strip()
2278                    if start.lstrip(
2279                            '-').isdigit() and end.lstrip('-').isdigit():
2280                        PERIODS[period] = (int(start), int(end))
2281                    else:
2282                        PERIODS[period] = (
2283                            9999,
2284                            'ERROR - start and/or end of ' +
2285                            period +
2286                            ' are not integers')
2287                else:
2288                    PERIODS[p[0]] = (
2289                        9999, 'ERROR in period map - each item must contain 3 elements')
2290    if options['cwp_period_tag'] and PERIODS:
2291        if earliest_date == 9999:  # i.e. no work date found
2292            if options['cwp_use_muso_refdb'] and options['cwp_muso_dates']:
2293                for composer_born in composer_born_list + arranger_born_list:
2294                    if composer_born and composer_born.isdigit():
2295                        birthdate = int(composer_born)
2296                        # productive age is taken as 20->death as per Muso
2297                        earliest_date = min(earliest_date, birthdate + 20)
2298                        for composer_died in composer_died_list + arranger_died_list:
2299                            if composer_died and composer_died.isdigit():
2300                                deathdate = int(composer_died)
2301                                latest_date = max(latest_date, deathdate)
2302                            else:
2303                                latest_date = datetime.now().year
2304        # sort into start date order before writing tags
2305        sorted_periods = collections.OrderedDict(
2306            sorted(PERIODS.items(), key=lambda t: t[1]))
2307        for period in sorted_periods:
2308            if isinstance(
2309                    sorted_periods[period][1],
2310                    str) and 'ERROR' in sorted_periods[period][1]:
2311                tm[options['cwp_period_tag']] = ''
2312                append_tag(
2313                    release_id,
2314                    tm,
2315                    '001_errors:9',
2316                    '9. ' +
2317                    sorted_periods[period])
2318                break
2319            if earliest_date < 9999:
2320                if sorted_periods[period][0] <= earliest_date <= sorted_periods[period][1]:
2321                    append_tag(
2322                        release_id,
2323                        tm,
2324                        options['cwp_period_tag'],
2325                        period)
2326            if latest_date > -9999:
2327                if sorted_periods[period][0] <= latest_date <= sorted_periods[period][1]:
2328                    append_tag(
2329                        release_id,
2330                        tm,
2331                        options['cwp_period_tag'],
2332                        period)
2333
2334    # generic tag mapping
2335    sort_tags = options['cea_tag_sort']
2336    if sort_tags:
2337        tm['artists_sort'] = str_to_list(tm['~artists_sort'])
2338    for i in range(0, 16):
2339        tagline = options['cea_tag_' + str(i + 1)].split(",")
2340        source_group = options['cea_source_' + str(i + 1)].split(",")
2341        conditional = options['cea_cond_' + str(i + 1)]
2342        for item, tagx in enumerate(tagline):
2343            tag = tagx.strip()
2344            sort = sort_suffix(tag)
2345            if not conditional or tm[tag] == "":
2346                for source_memberx in source_group:
2347                    source_member = source_memberx.strip()
2348                    sourceline = source_member.split("+")
2349                    if len(sourceline) > 1:
2350                        source = "\\"
2351                        for source_itemx in sourceline:
2352                            source_item = source_itemx.strip()
2353                            source_itema = source_itemx.lstrip()
2354                            write_log(
2355                                    release_id, 'info', "Source_item: %s", source_item)
2356                            if "~cea_" + source_item in tm:
2357                                si = tm['~cea_' + source_item]
2358                            elif "~cwp_" + source_item in tm:
2359                                si = tm['~cwp_' + source_item]
2360                            elif source_item in tm:
2361                                si = tm[source_item]
2362                            elif len(source_itema) > 0 and source_itema[0] == "\\":
2363                                si = source_itema[1:]
2364                            else:
2365                                si = ""
2366                            if si != "" and source != "":
2367                                source = source + si
2368                            else:
2369                                source = ""
2370                    else:
2371                        source = sourceline[0]
2372                    no_names_source = re.sub('(_names)$', 's', source)
2373                    source_sort = sort_suffix(source)
2374                    write_log(
2375                            release_id,
2376                            'info',
2377                            "Tag mapping: Line: %s, Source: %s, Tag: %s, no_names_source: %s, sort: %s, item %s",
2378                            i +
2379                            1,
2380                            source,
2381                            tag,
2382                            no_names_source,
2383                            sort,
2384                            item)
2385                    if '~cea_' + source in tm or '~cwp_' + source in tm:
2386                        for prefix in ['~cea_', '~cwp_']:
2387                            if prefix + source in tm:
2388                                write_log(release_id, 'info', prefix)
2389                                append_tag(release_id, tm, tag,
2390                                           tm[prefix + source], ['; '])
2391                                if sort_tags:
2392                                    if prefix + no_names_source + source_sort in tm:
2393                                        write_log(
2394                                                release_id, 'info', prefix + " sort")
2395                                        append_tag(release_id, tm, tag + sort,
2396                                                   tm[prefix + no_names_source + source_sort], ['; '])
2397                    elif source in tm or '~' + source in tm:
2398                        write_log(release_id, 'info', "Picard")
2399                        for p in ['', '~']:
2400                            if p + source in tm:
2401                                append_tag(release_id, tm, tag,
2402                                           tm[p + source], ['; ', '/ '])
2403                        if sort_tags:
2404                            if "~" + source + source_sort in tm:
2405                                source = "~" + source
2406                            if source + source_sort in tm:
2407                                write_log(
2408                                        release_id, 'info', "Picard sort")
2409                                append_tag(release_id, tm, tag + sort,
2410                                           tm[source + source_sort], ['; ', '/ '])
2411                    elif len(source) > 0 and source[0] == "\\":
2412                        append_tag(release_id, tm, tag,
2413                                   source[1:], ['; ', '/ '])
2414                    else:
2415                        pass
2416
2417    # write error messages to tags
2418    if options['log_error'] and "~cea_error" in tm:
2419        for error in str_to_list(tm['~cea_error']):
2420            ecode = error[0]
2421            append_tag(release_id, tm, '001_errors:' + ecode, error)
2422    if options['log_warning'] and "~cea_warning" in tm:
2423        for warning in str_to_list(tm['~cea_warning']):
2424            wcode = warning[0]
2425            append_tag(release_id, tm, '002_warnings:' + wcode, warning)
2426
2427    # delete unwanted tags
2428    if not options['log_debug']:
2429        if '~cea_works_complete' in tm:
2430            del tm['~cea_works_complete']
2431        if '~cea_artists_complete' in tm:
2432            del tm['~cea_artists_complete']
2433        del_list = []
2434        for t in tm:
2435            if 'ce_tag_cleared' in t:
2436                del_list.append(t)
2437        for t in del_list:
2438            del tm[t]
2439
2440    # create hidden tags to flag differences
2441    if options['ce_show_ui_tags'] and options['ce_ui_tags']:
2442        for heading_name, tag_tuple in UI_TAGS:  # UI_TAGS is already iterated in main routine, so no need for .items() method here
2443            heading_tag = '~' + heading_name + '_VAL'
2444            for tag in tag_tuple:
2445                if tag[-5:] != '_DIFF':
2446                    append_tag(release_id, tm, heading_tag, tm[tag])
2447                else:
2448                    tag = '~' + tag
2449                    mirror_tags = str_to_list((tm['~ce_mirror_tags']))
2450                    for mirror_tag in mirror_tags:
2451                        mt = interpret(mirror_tag)
2452                        st = str_to_list(mt)
2453                        (old_tag, new_tag) = tuple(st)
2454                        diff_name = old_tag.replace('OLD', 'DIFF')
2455                        if diff_name == tag and  tm[old_tag] != tm[new_tag]:
2456                            tm[diff_name] = '*****'
2457                            append_tag(release_id, tm, heading_tag, '*****')
2458                            break
2459
2460    # if options over-write enabled, remove it after processing one album
2461    options['ce_options_overwrite'] = False
2462    config.setting['ce_options_overwrite'] = False
2463    # so that options are not retained (in case of refresh with different
2464    # options)
2465    if '~ce_options' in tm:
2466        del tm['~ce_options']
2467
2468    # remove any unwanted file tags
2469    if '~ce_file' in tm and tm['~ce_file'] != "None":
2470        music_file = tm['~ce_file']
2471        orig_metadata = album.tagger.files[music_file].orig_metadata
2472        if 'delete_tags' in options and options['delete_tags']:
2473            warn = []
2474            for delete_item in options['delete_tags']:
2475                if delete_item not in tm:  # keep the original for comparison if we have a new version
2476                    if delete_item in orig_metadata:
2477                        del orig_metadata[delete_item]
2478                        if delete_item != '002_warnings:7':  # to avoid circularity!
2479                            warn.append(delete_item)
2480            if warn and options['log_warning']:
2481                append_tag(
2482                    release_id,
2483                    tm,
2484                    '002_warnings:7',
2485                    '7. Deleted tags: ' +
2486                    ', '.join(warn))
2487                write_log(
2488                    release_id,
2489                    'warning',
2490                    'Deleted tags: ' +
2491                    ', '.join(warn))
2492
2493
2494def sort_suffix(tag):
2495    """To determine what sort suffix is appropriate for a given tag"""
2496    if tag == "composer" or tag == "artist" or tag == "albumartist" or tag == "trackartist" or tag == "~cea_MB_artist":
2497        sort = "sort"
2498    else:
2499        sort = "_sort"
2500    return sort
2501
2502
2503def append_tag(release_id, tm, tag, source, separators=None):
2504    """
2505    Update a tag
2506    :param release_id: name for log file - usually =musicbrainz_albumid
2507        unless called outside metadata processor
2508    :param tm: track metadata
2509    :param tag: tag to be appended to
2510    :param source: item to append to tag
2511    :param separators: characters which may be used to split string into a list
2512        (any of the characters will be a split point)
2513    :return: None. Action is on tm
2514    """
2515    if not separators:
2516        separators = []
2517    if tag and tag != "":
2518        if config.setting['log_info']:
2519            write_log(
2520                release_id,
2521                'info',
2522                'Appending source: %r to tag: %s (source is type %s) ...',
2523                source,
2524                tag,
2525                type(source))
2526            if tag in tm:
2527                write_log(
2528                    release_id,
2529                    'info',
2530                    '... existing tag contents = %r',
2531                    tm[tag])
2532        if source and len(source) > 0:
2533            if isinstance(source, str):
2534                if separators:
2535                    source = re.split('|'.join(separators), source)
2536                else:
2537                    source = [source]
2538            if not isinstance(source, list):
2539                source = [source]  # typically for dict items such as saved options
2540            if all([isinstance(x, str) for x in source]):  # only append if if source is a list of strings
2541                if tag not in tm:
2542                    if tag == 'artists_sort':
2543                        # There is no artists_sort tag in Picard - just a
2544                        # hidden var ~artists_sort, so pick up those into the new tag
2545                        hidden = tm['~artists_sort']
2546                        if not isinstance(hidden, list):
2547                            if separators:
2548                                hidden = re.split(
2549                                    '|'.join(separators), hidden)
2550                                for i, h in enumerate(hidden):
2551                                    hidden[i] = h.strip()
2552                            else:
2553                                hidden = [hidden]
2554                        source = add_list_uniquely(source, hidden)
2555                    new_tag = True
2556                else:
2557                    new_tag = False
2558
2559                for source_item in source:
2560                    if isinstance(source_item, str):
2561                        source_item = source_item.replace(u'\u2010', u'-')
2562                        source_item = source_item.replace(u'\u2011', u'-')
2563                        source_item = source_item.replace(u'\u2019', u"'")
2564                        source_item = source_item.replace(u'\u2018', u"'")
2565                        source_item = source_item.replace(u'\u201c', u'"')
2566                        source_item = source_item.replace(u'\u201d', u'"')
2567                    if new_tag:
2568                        tm[tag] = [source_item]
2569                        new_tag = False
2570                    else:
2571                        if not isinstance(tm[tag], list):
2572                            if separators:
2573                                tag_list = re.split(
2574                                    '|'.join(separators), tm[tag])
2575                                for i, t in enumerate(tag_list):
2576                                    tag_list[i] = t.strip()
2577                            else:
2578                                tag_list = [tm[tag]]
2579                        else:
2580                            tag_list = tm[tag]
2581                        if source_item not in tm[tag]:
2582                            tag_list.append(source_item)
2583                            tm[tag] = tag_list
2584                    # NB tag_list is used as metadata object will convert single-item lists to strings
2585            else:  # source items are not strings, so just replace
2586                tm[tag] = source
2587
2588def get_artist_credit(options, release_id, obj):
2589    """
2590    :param release_id: name for log file - usually =musicbrainz_albumid
2591        unless called outside metadata processor
2592    :param options:
2593    :param obj: an XmlNode
2594    :return: a list of as-credited names
2595    """
2596    name_credit_list = parse_data(release_id, obj, [], 'artist-credit')
2597    credit_list = []
2598    if name_credit_list:
2599        for name_credits in name_credit_list:
2600            for name_credit in name_credits:
2601                credited_artist = parse_data(
2602                    release_id, name_credit, [], 'name')
2603                if credited_artist:
2604                    name = parse_data(
2605                        release_id, name_credit, [], 'artist', 'name')
2606                    sort_name = parse_data(
2607                        release_id, name_credit, [], 'artist', 'sort-name')
2608                    credit_item = (credited_artist, name, sort_name)
2609                    credit_list.append(credit_item)
2610        return credit_list
2611
2612
2613def get_aliases_and_credits(
2614        self,
2615        options,
2616        release_id,
2617        album,
2618        obj,
2619        lang,
2620        credited):
2621    """
2622    :param release_id: name for log file - usually =musicbrainz_albumid
2623        unless called outside metadata processor
2624    :param album:
2625    :param self: This relates to the object in the class which called this function
2626    :param options:
2627    :param obj: an XmlNode
2628    :param lang: The language selected in the Picard metadata options
2629    :param credited: The options item to determine what as-credited names are being sought
2630    :return: None. Sets self.artist_aliases and self.artist_credits[album]
2631    """
2632    name_credit_list = parse_data(release_id, obj, [], 'artist-credit')
2633    artist_list = parse_data(release_id, name_credit_list, [], 'artist')
2634    for artist in artist_list:
2635        sort_names = parse_data(release_id, artist, [], 'sort-name')
2636        if sort_names:
2637            aliases = parse_data(release_id, artist, [], 'aliases', 'locale:' +
2638                                 lang, 'primary:True', 'name')
2639            if aliases:
2640                self.artist_aliases[sort_names[0]] = aliases[0]
2641    if credited:
2642        for name_credit in name_credit_list[0]:
2643            credited_artist = parse_data(release_id, name_credit, [], 'name')
2644            if credited_artist:
2645                sort_name = parse_data(
2646                    release_id, name_credit, [], 'artist', 'sort-name')
2647                if sort_name:
2648                    self.artist_credits[album][sort_name[0]
2649                                               ] = credited_artist[0]
2650
2651
2652def get_relation_credits(
2653        self,
2654        options,
2655        release_id,
2656        album,
2657        obj,
2658        lang,
2659        credited):
2660    """
2661    :param release_id: name for log file - usually =musicbrainz_albumid
2662        unless called outside metadata processor
2663    :param self:
2664    :param options: UI options
2665    :param album: current album
2666    :param obj: Xmlnode
2667    :param lang: language
2668    :param credited: credited-as name
2669    :return: None
2670    Note that direct recording relationships will over-ride indirect ones (via work)
2671    """
2672
2673    rels = parse_data(release_id, obj, [], 'relations', 'target-type:work',
2674                      'work', 'relations', 'target-type:artist')
2675
2676    for artist in rels:
2677        sort_names = parse_data(release_id, artist, [], 'artist', 'sort-name')
2678        if sort_names:
2679            credited_artists = parse_data(
2680                release_id, artist, [], 'target-credit')
2681            if credited_artists and credited_artists[0] != '' and credited:
2682                self.artist_credits[album][sort_names[0]
2683                                           ] = credited_artists[0]
2684            aliases = parse_data(
2685                release_id,
2686                artist,
2687                [],
2688                'artist',
2689                'aliases',
2690                'locale:' + lang,
2691                'primary:True',
2692                'name')
2693            if aliases:
2694                self.artist_aliases[sort_names[0]] = aliases[0]
2695
2696    rels2 = parse_data(release_id, obj, [], 'relations', 'target-type:artist')
2697
2698    for artist in rels2:
2699        sort_names = parse_data(release_id, artist, [], 'artist', 'sort-name')
2700        if sort_names:
2701            credited_artists = parse_data(
2702                release_id, artist, [], 'target-credit')
2703            if credited_artists and credited_artists[0] != '' and credited:
2704                self.artist_credits[album][sort_names[0]
2705                                           ] = credited_artists[0]
2706            aliases = parse_data(
2707                release_id,
2708                artist,
2709                [],
2710                'artist',
2711                'aliases',
2712                'locale:' + lang,
2713                'primary:True',
2714                'name')
2715            if aliases:
2716                self.artist_aliases[sort_names[0]] = aliases[0]
2717
2718
2719def composer_last_names(self, release_id, tm, album):
2720    """
2721    :param release_id: name for log file - usually =musicbrainz_albumid
2722        unless called outside metadata processor
2723    :param self:
2724    :param tm:
2725    :param album:
2726    :return: None
2727    Sets composer last names for album prefixing
2728    """
2729    if '~cea_album_track_composer_lastnames' in tm:
2730        if not isinstance(tm['~cea_album_track_composer_lastnames'], list):
2731            atc_list = re.split(
2732                '|'.join(
2733                    self.SEPARATORS),
2734                tm['~cea_album_track_composer_lastnames'])
2735        else:
2736            atc_list = str_to_list(tm['~cea_album_track_composer_lastnames'])
2737        for atc_item in atc_list:
2738            composer_lastnames = atc_item.strip()
2739            if '~length' in tm and tm['~length']:
2740                track_length = time_to_secs(tm['~length'])
2741            else:
2742                track_length = 0
2743            if album in self.album_artists:
2744                if 'composer_lastnames' in self.album_artists[album]:
2745                    if composer_lastnames not in self.album_artists[album]['composer_lastnames']:
2746                        self.album_artists[album]['composer_lastnames'][composer_lastnames] = {
2747                            'length': track_length}
2748                    else:
2749                        self.album_artists[album]['composer_lastnames'][composer_lastnames]['length'] += track_length
2750                else:
2751                    self.album_artists[album]['composer_lastnames'][composer_lastnames] = {
2752                        'length': track_length}
2753            else:
2754                self.album_artists[album]['composer_lastnames'][composer_lastnames] = {
2755                    'length': track_length}
2756    else:
2757        write_log(
2758                release_id,
2759                'warning',
2760                "No _cea_album_track_composer_lastnames variable available for recording \"%s\".",
2761                tm['title'])
2762        if 'composer' in tm:
2763            self.append_tag(
2764                release_id,
2765                release_id,
2766                tm,
2767                '~cea_warning',
2768                '1. Composer for this track is not in album artists and will not be available to prefix album')
2769        else:
2770            self.append_tag(
2771                release_id,
2772                release_id,
2773                tm,
2774                '~cea_warning',
2775                '1. No composer for this track, but checking parent work.')
2776
2777
2778def add_list_uniquely(list_to, list_from):
2779    """
2780    Adds any items in list_from to list_to, if they are not already present
2781    If either arg is a string, it will be converted to a list, e.g. 'abc' -> ['abc']
2782    :param list_to:
2783    :param list_from:
2784    :return: appends only unique elements of list 2 to list 1
2785    """
2786    #
2787    if list_to and list_from:
2788        if not isinstance(list_to, list):
2789            list_to = str_to_list(list_to)
2790        if not isinstance(list_from, list):
2791            list_from = str_to_list(list_from)
2792        for list_item in list_from:
2793            if list_item not in list_to:
2794                list_to.append(list_item)
2795    else:
2796        if list_from:
2797            list_to = list_from
2798    return list_to
2799
2800
2801def str_to_list(s):
2802    """
2803    :param s:
2804    :return: list from string using ; as separator
2805    """
2806    if isinstance(s, list):
2807        return s
2808    if not isinstance(s, str):
2809        try:
2810            return list(s)
2811        except TypeError:
2812            return []
2813    else:
2814        if s == '':
2815            return []
2816        else:
2817            return s.split('; ')
2818
2819
2820def list_to_str(l):
2821    """
2822    :param l:
2823    :return: string from list using ; as separator
2824    """
2825    if not isinstance(l, list):
2826        return l
2827    else:
2828        return '; '.join(l)
2829
2830
2831def interpret(tag):
2832    """
2833    :param tag:
2834    :return: safe form of eval(tag)
2835    """
2836    if isinstance(tag, str):
2837        try:
2838            tag = tag.strip(' \n\t')
2839            return ast.literal_eval(tag)
2840        except (SyntaxError, ValueError):
2841            return tag
2842    else:
2843        return tag
2844
2845
2846def time_to_secs(a):
2847    """
2848    :param a: string x:x:x
2849    :return: seconds
2850    converts string times to seconds
2851    """
2852    ax = a.split(':')
2853    ax = ax[::-1]
2854    t = 0
2855    for i, x in enumerate(ax):
2856        if x.isdigit():
2857            t += int(x) * (60 ** i)
2858        else:
2859            return 0
2860    return t
2861
2862
2863def seq_last_names(self, album):
2864    """
2865    Sequences composer last names for album prefix by the total lengths of their tracks
2866    :param self:
2867    :param album:
2868    :return:
2869    """
2870    ln = []
2871    if album in self.album_artists and 'composer_lastnames' in self.album_artists[album]:
2872        for x in self.album_artists[album]['composer_lastnames']:
2873            if 'length' in self.album_artists[album]['composer_lastnames'][x]:
2874                ln.append([x, self.album_artists[album]
2875                           ['composer_lastnames'][x]['length']])
2876            else:
2877                return []
2878        ln = sorted(ln, key=lambda a: a[1])
2879        ln = ln[::-1]
2880    return [a[0] for a in ln]
2881
2882
2883def year(date):
2884    """
2885    Return YYYY portion of date(s) in YYYY-MM-DD format (may be incomplete, string or list)
2886    :param date:
2887    :return: YYYY
2888    """
2889    if isinstance(date, list):
2890        year_list = [blank_if_none(d).split('-')[0] for d in date]
2891        return year_list
2892    else:
2893        date_list = blank_if_none(date).split('-')
2894        return [date_list[0]]
2895
2896
2897def blank_if_none(val):
2898    """
2899    Make NoneTypes strings
2900    :param val: str or None
2901    :return: str
2902    """
2903    if not val:
2904        return ''
2905    else:
2906        return val
2907
2908
2909def strip_excess_punctuation(s):
2910    """
2911    remove orphan punctuation, unmatched quotes and brackets
2912    :param s: string
2913    :return: string
2914    """
2915    if s:
2916        s_prev = ''
2917        counter = 0
2918        while s != s_prev:
2919            if counter > 100:
2920                break  # safety valve
2921            s_prev = s
2922            s = s.replace('  ', ' ')
2923            s = s.strip("&.-:;, ")
2924            s = s.lstrip("!)]}")
2925            s = s.rstrip("([{")
2926            s = s.lstrip(u"\u2019") # Right single quote
2927            s = s.lstrip(u"\u201D") # Right double quote
2928            if s.count(u"\u201E") == 0: # u201E is lower double quote (German etc.)
2929                s = s.rstrip(u"\u201C") # Left double quote - only strip if there is no German-style lower quote present
2930            s = s.rstrip(u"\u2018") # Left single quote
2931            if s.count('"') % 2 != 0:
2932                s = s.strip('"')
2933            if s.count("'") % 2 != 0:
2934                s = s.strip("'")
2935            if len(s) > 0 and s[0] == u"\u201C" and s.count(u"\u201D") == 0:
2936                s = s.lstrip(u"\u201C")
2937            if len(s) > 0 and s[-1] == u"\u201D" and s.count(u"\u201C") == 0 and s.count(u"\u201E") == 0:  # only strip if there is no German-style lower quote present
2938                s = s.rstrip(u"\u201D")
2939            if len(s) > 0 and s[0] == u"\u2018" and s.count(u"\u2019") == 0:
2940                s = s.lstrip(u"\u2018")
2941            if len(s) > 0 and s[-1] == u"\u2019" and s.count(u"\u2018") == 0:
2942                s = s.rstrip(u"\u2019")
2943            if s:
2944                if s.count("\"") == 1:
2945                    s = s.replace('"', '')
2946                if s.count("\'") == 1:
2947                    s = s.replace(" '", " ")
2948                    # s = s.replace("' ", " ") # removed to prevent removal of genuine apostrophes
2949                if "(" in s and ")" not in s:
2950                    s = s.replace("(", "")
2951                if ")" in s and "(" not in s:
2952                    s = s.replace(")", "")
2953                if "[" in s and "]" not in s:
2954                    s = s.replace("[", "")
2955                if "]" in s and "[" not in s:
2956                    s = s.replace("]", "")
2957                if "{" in s and "}" not in s:
2958                    s = s.replace("{", "")
2959                if "}" in s and "{" not in s:
2960                    s = s.replace("}", "")
2961            if s:
2962                match_chars = [("(", ")"), ("[", "]"), ("{", "}")]
2963                last = len(s) - 1
2964                for char_pair in match_chars:
2965                    if char_pair[0] == s[0] and char_pair[1] == s[last]:
2966                        s = s.lstrip(char_pair[0]).rstrip(char_pair[1])
2967            counter += 1
2968    return s
2969
2970
2971#################
2972#################
2973# EXTRA ARTISTS #
2974#################
2975#################
2976
2977
2978class ExtraArtists():
2979
2980    # CONSTANTS
2981    def __init__(self):
2982        self.album_artists = collections.defaultdict(
2983            lambda: collections.defaultdict(dict))
2984        # collection of artists to be applied at album level
2985
2986        self.track_listing = collections.defaultdict(list)
2987        # collection of tracks - format is {album: [track 1,
2988        # track 2, ...]}
2989
2990        self.options = collections.defaultdict(dict)
2991        # collection of Classical Extras options
2992
2993        self.globals = collections.defaultdict(dict)
2994        # collection of global variables for this class
2995
2996        self.album_performers = collections.defaultdict(
2997            lambda: collections.defaultdict(dict))
2998        # collection of performers who have release relationships, not track
2999        # relationships
3000
3001        self.album_instruments = collections.defaultdict(
3002            lambda: collections.defaultdict(dict))
3003        # collection of instruments which have release relationships, not track
3004        # relationships
3005
3006        self.artist_aliases = {}
3007        # collection of alias names - format is {sort_name: alias_name, ...}
3008
3009        self.artist_credits = collections.defaultdict(dict)
3010        # collection of credited-as names - format is {album: {sort_name: credit_name,
3011        # ...}, ...}
3012
3013        self.release_artists_sort = collections.defaultdict(list)
3014        # collection of release artists - format is {album: [sort_name_1,
3015        # sort_name_2, ...]}
3016
3017        self.lyricist_filled = collections.defaultdict(dict)
3018        # Boolean for each track to indicate if lyricist has been found (don't
3019        # want to add more from higher levels)
3020        # NB this last one is for completeness - not actually used by
3021        # ExtraArtists, but here to remove pep8 error
3022
3023        self.album_series_list = collections.defaultdict(dict)
3024        # series relationships - format is {'name_list': series names, 'id_list': series ids, 'number_list': number within series}
3025
3026    def add_artist_info(
3027            self,
3028            album,
3029            track_metadata,
3030            trackXmlNode,
3031            releaseXmlNode):
3032        """
3033        Main routine run for each track of release
3034        :param album: Current release
3035        :param track_metadata: track metadata dictionary
3036        :param trackXmlNode: Everything in the track node downwards
3037        :param releaseXmlNode: Everything in the release node downwards (so includes all track nodes)
3038        :return:
3039        """
3040        release_id = track_metadata['musicbrainz_albumid']
3041        if 'start' not in release_status[release_id]:
3042            release_status[release_id]['start'] = datetime.now()
3043        if 'lookups' not in release_status[release_id]:
3044            release_status[release_id]['lookups'] = 0
3045        release_status[release_id]['name'] = track_metadata['album']
3046        release_status[release_id]['artists'] = True
3047        if config.setting['log_debug'] or config.setting['log_info']:
3048            write_log(
3049                release_id,
3050                'debug',
3051                'STARTING ARTIST PROCESSING FOR ALBUM %s, DISC %s, TRACK %s',
3052                track_metadata['album'],
3053                track_metadata['discnumber'],
3054                track_metadata['tracknumber'] +
3055                ' ' +
3056                track_metadata['title'])
3057        # write_log(release_id, 'info', 'trackXmlNode = %s', trackXmlNode) # NB can crash Picard
3058        # write_log('info', 'releaseXmlNode = %s', releaseXmlNode) # NB can crash Picard
3059        # Jump through hoops to get track object!!
3060        track = album._new_tracks[-1]
3061        tm = track.metadata
3062
3063        # OPTIONS - OVER-RIDE IF REQUIRED
3064        if '~ce_options' not in tm:
3065            if config.setting['log_debug'] or config.setting['log_info']:
3066                write_log(release_id, 'debug', 'Artists gets track first...')
3067            get_options(release_id, album, track)
3068        options = interpret(tm['~ce_options'])
3069        if not options:
3070            if config.setting["log_error"]:
3071                write_log(
3072                    release_id,
3073                    'error',
3074                    'Artists. Failure to read saved options for track %s. options = %s',
3075                    track,
3076                    tm['~ce_options'])
3077            options = option_settings(config.setting)
3078        self.options[track] = options
3079
3080        # CONSTANTS
3081        self.ERROR = options["log_error"]
3082        self.WARNING = options["log_warning"]
3083        self.ORCHESTRAS = options["cea_orchestras"].split(',')
3084        self.CHOIRS = options["cea_choirs"].split(',')
3085        self.GROUPS = options["cea_groups"].split(',')
3086        self.ENSEMBLE_TYPES = self.ORCHESTRAS + self.CHOIRS + self.GROUPS
3087        self.SEPARATORS = ['; ', '/ ', ';', '/']
3088
3089        # continue?
3090        if not options["classical_extra_artists"]:
3091            return
3092        # album_files is not used - this is just for logging
3093        album_files = album.tagger.get_files_from_objects([album])
3094        if options['log_info']:
3095            write_log(
3096                release_id,
3097                'info',
3098                'ALBUM FILENAMES for album %r = %s',
3099                album,
3100                album_files)
3101
3102        if not (
3103            options["ce_no_run"] and (
3104                not tm['~ce_file'] or tm['~ce_file'] == "None")):
3105            # continue
3106            write_log(
3107                    release_id,
3108                    'debug',
3109                    "ExtraArtists - add_artist_info")
3110            if album not in self.track_listing or track not in self.track_listing[album]:
3111                self.track_listing[album].append(track)
3112            # fix odd hyphens in names for consistency
3113            field_types = ['~albumartists', '~albumartists_sort']
3114            for field_type in field_types:
3115                if field_type in tm:
3116                    field = tm[field_type]
3117                    if isinstance(field, list):
3118                        for x, it in enumerate(field):
3119                            field[x] = it.replace(u'\u2010', u'-')
3120                    elif isinstance(field, str):
3121                        field = field.replace(u'\u2010', u'-')
3122                    else:
3123                        pass
3124                    tm[field_type] = field
3125
3126            # first time for this album (reloads each refresh)
3127            if tm['discnumber'] == '1' and tm['tracknumber'] == '1':
3128                # get artist aliases - these are cached so can be re-used across
3129                # releases, but are reloaded with each refresh
3130                get_aliases(self, release_id, album, options, releaseXmlNode)
3131
3132                # xml_type = 'release'
3133                # get performers etc who are related at the release level
3134                relation_list = parse_data(
3135                    release_id, releaseXmlNode, [], 'relations')
3136                album_performerList = get_artists(
3137                    options, release_id, tm, relation_list, 'release')['artists']
3138                self.album_performers[album] = album_performerList
3139                album_instrumentList = get_artists(
3140                    options, release_id, tm, relation_list, 'release')['instruments']
3141                self.album_instruments[album] = album_instrumentList
3142
3143                # get series information
3144                self.album_series_list = get_series(
3145                    options, release_id, relation_list)
3146
3147            else:
3148                if album in self.album_performers:
3149                    album_performerList = self.album_performers[album]
3150                else:
3151                    album_performerList = []
3152                if album in self.album_instruments and self.album_instruments[album]:
3153                    tm['~cea_instruments'] = self.album_instruments[album][0]
3154                    tm['~cea_instruments_credited'] = self.album_instruments[album][1]
3155                    tm['~cea_instruments_all'] = self.album_instruments[album][2]
3156                    # Should be OK to initialise these here as recording artists
3157                    # yet to be processed
3158
3159            # Fill release info not given by vanilla Picard
3160            if self.album_series_list:
3161                tm['series'] = self.album_series_list['name_list'] if 'name_list' in self.album_series_list else None
3162                tm['musicbrainz_seriesid'] = self.album_series_list['id_list'] if 'id_list' in self.album_series_list else None
3163                tm['series_number'] = self.album_series_list['number_list'] if 'number_list' in self.album_series_list else None
3164                ## TODO add label id too
3165            recording_relation_list = parse_data(
3166                release_id, trackXmlNode, [], 'recording', 'relations')
3167            recording_series_list = get_series(
3168                options, release_id, recording_relation_list)
3169            write_log(
3170                release_id,
3171                'info',
3172                'Recording_series_list = %s',
3173                recording_series_list)
3174
3175            track_artist_list = parse_data(
3176                release_id, trackXmlNode, [], 'artist-credit')
3177            if track_artist_list:
3178                track_artist = []
3179                track_artistsort = []
3180                track_artists = []
3181                track_artists_sort = []
3182                locale = config.setting["artist_locale"]
3183                # NB this is the Picard code in /util
3184                lang = locale.split("_")[0]
3185
3186                # Set naming option
3187                # Put naming style into preferential list
3188
3189                # naming as for vanilla Picard for track artists
3190
3191                if options['translate_artist_names'] and lang:
3192                    name_style = ['alias', 'sort']
3193                    # documentation indicates that processing should be as below,
3194                    # but processing above appears to reflect what vanilla Picard actually does
3195                    # if options['standardize_artists']:
3196                    #     name_style = ['alias', 'sort']
3197                    # else:
3198                    #     name_style = ['alias', 'credit', 'sort']
3199                else:
3200                    if not options['standardize_artists']:
3201                        name_style = ['credit']
3202                    else:
3203                        name_style = []
3204                write_log(
3205                        release_id,
3206                        'info',
3207                        'Priority order of naming style for track artists = %s',
3208                        name_style)
3209                styled_artists = apply_artist_style(
3210                    options,
3211                    release_id,
3212                    lang,
3213                    track_artist_list,
3214                    name_style,
3215                    track_artist,
3216                    track_artistsort,
3217                    track_artists,
3218                    track_artists_sort)
3219                tm['artists'] = styled_artists['artists']
3220                tm['~artists_sort'] = styled_artists['artists_sort']
3221                tm['artist'] = styled_artists['artist']
3222                tm['artistsort'] = styled_artists['artistsort']
3223
3224            if 'recording' in trackXmlNode:
3225                self.globals[track]['is_recording'] = True
3226                write_log(release_id, 'debug', 'Getting recording details')
3227                recording = trackXmlNode['recording']
3228                if not isinstance(recording, list):
3229                    recording = [recording]
3230                for record in recording:
3231                    rec_type = type(record)
3232                    write_log(release_id, 'info', 'rec-type = %s', rec_type)
3233                    write_log(release_id, 'info', record)
3234                    # Note that the lists below reflect https://musicbrainz.org/relationships/artist-recording
3235                    # Any changes to that DB structure will require changes
3236                    # here
3237
3238                    # get recording artists data
3239                    recording_artist_list = parse_data(
3240                        release_id, record, [], 'artist-credit')
3241                    if recording_artist_list:
3242                        recording_artist = []
3243                        recording_artistsort = []
3244                        recording_artists = []
3245                        recording_artists_sort = []
3246                        locale = config.setting["artist_locale"]
3247                        # NB this is the Picard code in /util
3248                        lang = locale.split("_")[0]
3249
3250                        # Set naming option
3251                        # Put naming style into preferential list
3252
3253                        # naming as for vanilla Picard for track artists (per
3254                        # documentation rather than actual?)
3255                        if options['cea_ra_trackartist']:
3256                            if options['translate_artist_names'] and lang:
3257                                if options['standardize_artists']:
3258                                    name_style = ['alias', 'sort']
3259                                else:
3260                                    name_style = ['alias', 'credit', 'sort']
3261                            else:
3262                                if not options['standardize_artists']:
3263                                    name_style = ['credit']
3264                                else:
3265                                    name_style = []
3266                        # naming as for performers in classical extras
3267                        elif options['cea_ra_performer']:
3268                            if options['cea_aliases']:
3269                                if options['cea_alias_overrides']:
3270                                    name_style = ['alias', 'credit']
3271                                else:
3272                                    name_style = ['credit', 'alias']
3273                            else:
3274                                name_style = ['credit']
3275
3276                        else:
3277                            name_style = []
3278                        write_log(
3279                                release_id,
3280                                'info',
3281                                'Priority order of naming style for recording artists = %s',
3282                                name_style)
3283
3284                        styled_artists = apply_artist_style(
3285                            options,
3286                            release_id,
3287                            lang,
3288                            recording_artist_list,
3289                            name_style,
3290                            recording_artist,
3291                            recording_artistsort,
3292                            recording_artists,
3293                            recording_artists_sort)
3294                        self.append_tag(
3295                            release_id,
3296                            tm,
3297                            '~cea_recording_artists',
3298                            styled_artists['artists'])
3299                        self.append_tag(
3300                            release_id,
3301                            tm,
3302                            '~cea_recording_artists_sort',
3303                            styled_artists['artists_sort'])
3304                        self.append_tag(
3305                            release_id,
3306                            tm,
3307                            '~cea_recording_artist',
3308                            styled_artists['artist'])
3309                        self.append_tag(
3310                            release_id,
3311                            tm,
3312                            '~cea_recording_artistsort',
3313                            styled_artists['artistsort'])
3314
3315                    else:
3316                        tm['~cea_recording_artists'] = ''
3317                        tm['~cea_recording_artists_sort'] = ''
3318                        tm['~cea_recording_artist'] = ''
3319                        tm['~cea_recording_artistsort'] = ''
3320
3321                    # use recording artist options
3322                    tm['~cea_MB_artist'] = str_to_list(tm['artist'])
3323                    tm['~cea_MB_artistsort'] = str_to_list(tm['artistsort'])
3324                    tm['~cea_MB_artists'] = str_to_list(tm['artists'])
3325                    tm['~cea_MB_artists_sort'] = str_to_list(tm['~artists_sort'])
3326
3327                    if options['cea_ra_use']:
3328                        if options['cea_ra_replace_ta']:
3329                            if tm['~cea_recording_artist']:
3330                                tm['artist'] = str_to_list(tm['~cea_recording_artist'])
3331                                tm['artistsort'] = str_to_list(tm['~cea_recording_artistsort'])
3332                                tm['artists'] = str_to_list(tm['~cea_recording_artists'])
3333                                tm['~artists_sort'] = str_to_list(tm['~cea_recording_artists_sort'])
3334                            elif not options['cea_ra_noblank_ta']:
3335                                tm['artist'] = ''
3336                                tm['artistsort'] = ''
3337                                tm['artists'] = ''
3338                                tm['~artists_sort'] = ''
3339                        elif options['cea_ra_merge_ta']:
3340                            if tm['~cea_recording_artist']:
3341                                tm['artists'] = add_list_uniquely(
3342                                    tm['artists'], tm['~cea_recording_artists'])
3343                                tm['~artists_sort'] = add_list_uniquely(
3344                                    tm['~artists_sort'], tm['~cea_recording_artists_sort'])
3345                                if tm['artist'] != tm['~cea_recording_artist']:
3346                                    tm['artist'] = tm['artist'] + \
3347                                        ' (' + tm['~cea_recording_artist'] + ')'
3348                                    tm['artistsort'] = tm['artistsort'] + \
3349                                        ' (' + tm['~cea_recording_artistsort'] + ')'
3350
3351                    # xml_type = 'recording'
3352                    relation_list = parse_data(
3353                        release_id, record, [], 'relations')
3354                    performerList = album_performerList + \
3355                        get_artists(options, release_id, tm, relation_list, 'recording')['artists']
3356                    # returns
3357                    # [(artist type, instrument or None, artist name, artist sort name, instrument sort, type sort)]
3358                    # where instrument sort places solo ahead of additional etc.
3359                    # and type sort applies a custom sequencing to the artist
3360                    # types
3361                    if performerList:
3362                        write_log(
3363                                release_id, 'info', "Performers: %s", performerList)
3364                        self.set_performer(
3365                            release_id, album, track, performerList, tm)
3366                    if not options['classical_work_parts']:
3367                        work_artist_list = parse_data(
3368                            release_id,
3369                            record,
3370                            [],
3371                            'relations',
3372                            'target-type:work',
3373                            'type:performance',
3374                            'work',
3375                            'relations',
3376                            'target-type:artist')
3377                        work_artists = get_artists(
3378                            options, release_id, tm, work_artist_list, 'work')['artists']
3379                        set_work_artists(
3380                            self, release_id, album, track, work_artists, tm, 0)
3381                    # otherwise composers etc. will be set in work parts
3382            else:
3383                self.globals[track]['is_recording'] = False
3384        else:
3385            tm['000_major_warning'] = "WARNING: Classical Extras not run for this track as no file present - " \
3386                "deselect the option on the advanced tab to run. If there is a file, then try 'Refresh'."
3387        if track_metadata['tracknumber'] == track_metadata['totaltracks'] and track_metadata[
3388                'discnumber'] == track_metadata['totaldiscs']:  # last track
3389            self.process_album(release_id, album)
3390            release_status[release_id]['artists-done'] = datetime.now()
3391            close_log(release_id, 'artists')
3392
3393    # Checks for ensembles
3394    def ensemble_type(self, performer):
3395        """
3396        Returns ensemble types
3397        :param performer:
3398        :return:
3399        """
3400        for ensemble_name in self.ORCHESTRAS:
3401            ensemble = re.compile(
3402                r'(.*)\b' +
3403                ensemble_name +
3404                r'\b(.*)',
3405                re.IGNORECASE)
3406            if ensemble.search(performer):
3407                return 'Orchestra'
3408        for ensemble_name in self.CHOIRS:
3409            ensemble = re.compile(
3410                r'(.*)\b' +
3411                ensemble_name +
3412                r'\b(.*)',
3413                re.IGNORECASE)
3414            if ensemble.search(performer):
3415                return 'Choir'
3416        for ensemble_name in self.GROUPS:
3417            ensemble = re.compile(
3418                r'(.*)\b' +
3419                ensemble_name +
3420                r'\b(.*)',
3421                re.IGNORECASE)
3422            if ensemble.search(performer):
3423                return 'Group'
3424        return False
3425
3426    def process_album(self, release_id, album):
3427        """
3428        Perform final processing after all tracks read
3429        :param release_id: name for log file - usually =musicbrainz_albumid
3430        unless called outside metadata processor
3431        :param album:
3432        :return:
3433        """
3434        write_log(
3435                release_id,
3436                'debug',
3437                'ExtraArtists: Starting process_album')
3438        # process lyrics tags
3439        write_log(release_id, 'debug', 'Starting lyrics processing')
3440        common = []
3441        tmlyrics_dict = {}
3442        tmlyrics_sort = []
3443        options = {}
3444        for track in self.track_listing[album]:
3445            options = self.options[track]
3446            if options['cea_split_lyrics'] and options['cea_lyrics_tag']:
3447                tm = track.metadata
3448                lyrics_tag = options['cea_lyrics_tag']
3449                if tm[lyrics_tag]:
3450                    # turn text into word lists to speed processing
3451                    tmlyrics_dict[track] = tm[lyrics_tag].split()
3452        if tmlyrics_dict:
3453            tmlyrics_sort = sorted(
3454                tmlyrics_dict.items(),
3455                key=operator.itemgetter(1))
3456            prev = None
3457            first_track = None
3458            unique_lyrics = []
3459            ref_track = {}
3460            for lyric_tuple in tmlyrics_sort:  # tuple is (track, lyrics)
3461                if lyric_tuple[1] != prev:
3462                    unique_lyrics.append(lyric_tuple[1])
3463                    first_track = lyric_tuple[0]
3464                ref_track[lyric_tuple[0]] = first_track
3465                prev = lyric_tuple[1]
3466            common = turbo_lcs(
3467                release_id,
3468                unique_lyrics)
3469
3470        if common:
3471            unique = []
3472            for tup in tmlyrics_sort:
3473                track = tup[0]
3474                ref = ref_track[track]
3475                if track == ref:
3476                    start = substart_finder(tup[1], common)
3477                    length = len(common)
3478                    end = min(start + length, len(tup[1]))
3479                    unique = tup[1][:start] + tup[1][end:]
3480
3481                options = self.options[track]
3482                if options['cea_split_lyrics'] and options['cea_lyrics_tag']:
3483                    tm = track.metadata
3484                    if unique:
3485                        tm['~cea_track_lyrics'] = ' '.join(unique)
3486                    tm['~cea_album_lyrics'] = ' '.join(common)
3487                    if options['cea_album_lyrics']:
3488                        tm[options['cea_album_lyrics']] = tm['~cea_album_lyrics']
3489                    if unique and options['cea_track_lyrics']:
3490                        tm[options['cea_track_lyrics']] = tm['~cea_track_lyrics']
3491        else:
3492            for track in self.track_listing[album]:
3493                options = self.options[track]
3494                if options['cea_split_lyrics'] and options['cea_lyrics_tag']:
3495                    tm['~cea_track_lyrics'] = tm[options['cea_lyrics_tag']]
3496                    if options['cea_track_lyrics']:
3497                        tm[options['cea_track_lyrics']] = tm['~cea_track_lyrics']
3498        write_log(release_id, 'debug', 'Ending lyrics processing')
3499
3500        for track in self.track_listing[album]:
3501            self.write_metadata(release_id, options, album, track)
3502        self.track_listing[album] = []
3503        write_log(
3504                release_id,
3505                'info',
3506                "FINISHED Classical Extra Artists. Album: %s",
3507                album)
3508
3509
3510    def write_metadata(self, release_id, options, album, track):
3511        """
3512        Write the metadata for this track
3513        :param release_id:
3514        :param options:
3515        :param album:
3516        :param track:
3517        :return:
3518        """
3519        options = self.options[track]
3520        tm = track.metadata
3521        tm['~cea_version'] = PLUGIN_VERSION
3522
3523        # set inferred genres before any tags are blanked
3524        if options['cwp_genres_infer']:
3525            self.infer_genres(release_id, options, track, tm)
3526
3527        # album
3528        if not options['classical_work_parts']:
3529            if 'composer_lastnames' in self.album_artists[album]:
3530                last_names = seq_last_names(self, album)
3531                self.append_tag(
3532                    release_id,
3533                    tm,
3534                    '~cea_album_composer_lastnames',
3535                    last_names)
3536        # otherwise this is done in the workparts class, which has all
3537        # composer info
3538
3539        # process tag mapping
3540        tm['~cea_artists_complete'] = "Y"
3541        map_tags(options, release_id, album, tm)
3542
3543        # write out options and errors/warnings to tags
3544        if options['cea_options_tag'] != "":
3545            self.cea_options = collections.defaultdict(
3546                lambda: collections.defaultdict(
3547                    lambda: collections.defaultdict(dict)))
3548
3549            for opt in plugin_options(
3550                    'artists') + plugin_options('tag') + plugin_options('picard'):
3551                if 'name' in opt:
3552                    if 'value' in opt:
3553                        if options[opt['option']]:
3554                            self.cea_options['Classical Extras']['Artists options'][opt['name']] = opt['value']
3555                    else:
3556                        self.cea_options['Classical Extras']['Artists options'][opt['name']
3557                        ] = options[opt['option']]
3558
3559            for opt in plugin_options('tag_detail'):
3560                if opt['option'] != "":
3561                    name_list = opt['name'].split("_")
3562                    self.cea_options['Classical Extras']['Artists options'][name_list[0]
3563                    ][name_list[1]] = options[opt['option']]
3564
3565            if options['ce_version_tag'] and options['ce_version_tag'] != "":
3566                self.append_tag(release_id, tm, options['ce_version_tag'], str(
3567                    'Version ' + tm['~cea_version'] + ' of Classical Extras'))
3568            if options['cea_options_tag'] and options['cea_options_tag'] != "":
3569                self.append_tag(
3570                    release_id,
3571                    tm,
3572                    options['cea_options_tag'] +
3573                    ':artists_options',
3574                    json.loads(
3575                        json.dumps(
3576                            self.cea_options)))
3577
3578
3579    def infer_genres(self, release_id, options, track, tm):
3580        """
3581        Infer a genre from the artist/instrument metadata
3582        :param release_id:
3583        :param options:
3584        :param track:
3585        :param tm: track metadata
3586        :return:
3587        """
3588        # Note that this is now mixed in with other sources of genres in def map_tags
3589        # ~cea_work_type_if_classical is used for types that are specifically classical
3590        # and is only applied in map_tags if the track is deemed to be
3591        # classical
3592        if (self.globals[track]['is_recording'] and options['classical_work_parts']
3593                and '~artists_sort' in tm and 'composersort' in tm
3594                and any(x in tm['~artists_sort'] for x in tm['composersort'])
3595                and 'writer' not in tm
3596                and not any(x in tm['~artists_sort'] for x in tm['~cea_performers_sort'])):
3597            self.append_tag(
3598                release_id, tm, '~cea_work_type', 'Classical')
3599
3600        if isinstance(tm['~cea_soloists'], str):
3601            soloists = re.split(
3602                '|'.join(
3603                    self.SEPARATORS),
3604                tm['~cea_soloists'])
3605        else:
3606            soloists = tm['~cea_soloists']
3607        if '~cea_vocalists' in tm:
3608            if isinstance(tm['~cea_vocalists'], str):
3609                vocalists = re.split(
3610                    '|'.join(
3611                        self.SEPARATORS),
3612                    tm['~cea_vocalists'])
3613            else:
3614                vocalists = tm['~cea_vocalists']
3615        else:
3616            vocalists = []
3617
3618        if '~cea_ensembles' in tm:
3619            large = False
3620            if 'performer:orchestra' in tm:
3621                large = True
3622                self.append_tag(
3623                    release_id, tm, '~cea_work_type_if_classical', 'Orchestral')
3624                if '~cea_soloists' in tm:
3625                    if 'vocals' in tm['~cea_instruments_all']:
3626                        self.append_tag(
3627                            release_id, tm, '~cea_work_type', 'Vocal')
3628                    if len(soloists) == 1:
3629                        if soloists != vocalists:
3630                            self.append_tag(
3631                                release_id, tm, '~cea_work_type_if_classical', 'Concerto')
3632                        else:
3633                            self.append_tag(
3634                                release_id, tm, '~cea_work_type_if_classical', 'Aria')
3635                    elif len(soloists) == 2:
3636                        self.append_tag(
3637                            release_id, tm, '~cea_work_type_if_classical', 'Duet')
3638                        if not vocalists:
3639                            self.append_tag(
3640                                release_id, tm, '~cea_work_type_if_classical', 'Concerto')
3641                    elif len(soloists) == 3:
3642                        self.append_tag(
3643                            release_id, tm, '~cea_work_type_if_classical', 'Trio')
3644                    elif len(soloists) == 4:
3645                        self.append_tag(
3646                            release_id, tm, '~cea_work_type_if_classical', 'Quartet')
3647
3648            if 'performer:choir' in tm or 'performer:choir vocals' in tm:
3649                large = True
3650                self.append_tag(
3651                    release_id, tm, '~cea_work_type_if_classical', 'Choral')
3652                self.append_tag(
3653                    release_id, tm, '~cea_work_type', 'Vocal')
3654            else:
3655                if large and 'soloists' in tm and tm['soloists'].count(
3656                        'vocals') > 1:
3657                    self.append_tag(
3658                        release_id, tm, '~cea_work_type_if_classical', 'Opera')
3659            if not large:
3660                if '~cea_soloists' not in tm:
3661                    self.append_tag(
3662                        release_id, tm, '~cea_work_type_if_classical', 'Chamber music')
3663                else:
3664                    if vocalists:
3665                        self.append_tag(
3666                            release_id, tm, '~cea_work_type', 'Song')
3667                        self.append_tag(
3668                            release_id, tm, '~cea_work_type', 'Vocal')
3669                    else:
3670                        self.append_tag(
3671                            release_id, tm, '~cea_work_type_if_classical', 'Chamber music')
3672        else:
3673            if len(soloists) == 1:
3674                if vocalists != soloists:
3675                    self.append_tag(
3676                        release_id, tm, '~cea_work_type', 'Instrumental')
3677                else:
3678                    self.append_tag(
3679                        release_id, tm, '~cea_work_type', 'Song')
3680                    self.append_tag(
3681                        release_id, tm, '~cea_work_type', 'Vocal')
3682            elif len(soloists) == 2:
3683                self.append_tag(
3684                    release_id, tm, '~cea_work_type_if_classical', 'Duet')
3685            elif len(soloists) == 3:
3686                self.append_tag(
3687                    release_id, tm, '~cea_work_type_if_classical', 'Trio')
3688            elif len(soloists) == 4:
3689                self.append_tag(
3690                    release_id, tm, '~cea_work_type_if_classical', 'Quartet')
3691            else:
3692                if not vocalists:
3693                    self.append_tag(
3694                        release_id, tm, '~cea_work_type_if_classical', 'Chamber music')
3695                else:
3696                    self.append_tag(
3697                        release_id, tm, '~cea_work_type', 'Song')
3698                    self.append_tag(
3699                        release_id, tm, '~cea_work_type', 'Vocal')
3700
3701
3702    def append_tag(self, release_id, tm, tag, source):
3703        """
3704        :param release_id: name for log file - usually =musicbrainz_albumid
3705        unless called outside metadata processor
3706        :param tm:
3707        :param tag:
3708        :param source:
3709        :return:
3710        """
3711        write_log(
3712                release_id,
3713                'info',
3714                "Extra Artists - appending %s to %s",
3715                source,
3716                tag)
3717        append_tag(release_id, tm, tag, source, self.SEPARATORS)
3718
3719    def set_performer(self, release_id, album, track, performerList, tm):
3720        """
3721        Sets the performer-related tags
3722        :param release_id: name for log file - usually =musicbrainz_albumid
3723        unless called outside metadata processor
3724        :param album:
3725        :param track:
3726        :param performerList: see below
3727        :param tm:
3728        :return:
3729        """
3730        # performerList is in format [(artist_type, [instrument list],[name list],[sort_name list],
3731        # instrument_sort, type_sort),(.....etc]
3732        # Sorted by type_sort then sort name then instrument_sort
3733        write_log(release_id, 'debug', "Extra Artists - set_performer")
3734        write_log(release_id, 'info', "Performer list is:")
3735        write_log(release_id, 'info', performerList)
3736        options = self.options[track]
3737        # tag strings are a tuple (Picard tag, cea tag, Picard sort tag, cea
3738        # sort tag)
3739        tag_strings = const.tag_strings('~cea')
3740        # insertions lists artist types where names in the main Picard tags may be updated for annotations
3741        # (not for performer types as Picard will write performer:inst as Performer name (inst) )
3742        insertions = const.INSERTIONS
3743
3744        # First remove all existing performer tags
3745        del_list = []
3746        for meta in tm:
3747            if 'performer' in meta:
3748                del_list.append(meta)
3749        for del_item in del_list:
3750            del tm[del_item]
3751        last_artist = []
3752        last_inst_list = []
3753        last_instrument = None
3754        artist_inst = []
3755        artist_inst_list = {}
3756        for performer in performerList:
3757            artist_type = performer[0]
3758            if artist_type not in tag_strings:
3759                return None
3760            if artist_type in ['instrument', 'vocal', 'performing orchestra']:
3761                if performer[1]:
3762                    inst_list = performer[1]
3763                    attrib_list = []
3764                    for attrib in ['solo', 'guest', 'additional']:
3765                        if attrib in inst_list:
3766                            inst_list.remove(attrib)
3767                            attrib_list.append(attrib)
3768                    attribs = " ".join(attrib_list)
3769                    instrument = ", ".join(inst_list)
3770                    if not options['cea_no_solo'] and attrib_list:
3771                        instrument = attribs + " " + instrument
3772                    if performer[3] == last_artist:
3773                        if instrument != last_instrument:
3774                            artist_inst.append(instrument)
3775                        else:
3776                            if inst_list == last_inst_list:
3777                                write_log(
3778                                    release_id, 'warning', 'Duplicated performer information for %s'
3779                                                           ' (may be in Release Relationship as well as Track Relationship).'
3780                                                           ' Duplicates have been ignored.', performer[3])
3781                                if self.WARNING:
3782                                    self.append_tag(
3783                                        release_id,
3784                                        tm,
3785                                        '~cea_warning',
3786                                        '2. Duplicated performer information for "' +
3787                                        '; '.join(
3788                                            performer[3]) +
3789                                        '" (may be in Release Relationship as well as Track Relationship).'
3790                                        ' Duplicates have been ignored.')
3791                    else:
3792                        artist_inst = [instrument]
3793                        last_artist = performer[3]
3794                        last_inst_list = inst_list
3795                        last_instrument = instrument
3796
3797                    instrument = ", ".join(artist_inst)
3798                else:
3799                    instrument = None
3800                if artist_type == 'performing orchestra':
3801                    instrument = 'orchestra'
3802                artist_inst_list[tuple(performer[3])] = instrument
3803        for performer in performerList:
3804            artist_type = performer[0]
3805            if artist_type not in tag_strings:
3806                return None
3807            performing_artist = False if artist_type in [
3808                'arranger', 'instrument arranger', 'orchestrator', 'vocal arranger'] else True
3809            if True and artist_type in [
3810                'instrument',
3811                'vocal',
3812                    'performing orchestra']:  # There may be an option here (to replace 'True')
3813                # Currently groups instruments by artist - alternative has been
3814                # tested if required
3815                instrument = artist_inst_list[tuple(performer[3])]
3816            else:
3817                if performer[1]:
3818                    inst_list = performer[1]
3819                    if options['cea_no_solo']:
3820                        for attrib in ['solo', 'guest', 'additional']:
3821                            if attrib in inst_list:
3822                                inst_list.remove(attrib)
3823                    instrument = " ".join(inst_list)
3824                else:
3825                    instrument = None
3826                if artist_type == 'performing orchestra':
3827                    instrument = 'orchestra'
3828            sub_strings = {'instrument': instrument,
3829                           'vocal': instrument  # ,
3830                           # 'instrument arranger': instrument,
3831                           # 'vocal arranger': instrument
3832                           }
3833            for typ in ['concertmaster']:
3834                if options['cea_' + typ] and options['cea_arrangers']:
3835                    sub_strings[typ] = ':' + options['cea_' + typ]
3836
3837            if options['cea_arranger']:
3838                if instrument:
3839                    arr_inst = options['cea_arranger'] + ' ' + instrument
3840                else:
3841                    arr_inst = options['cea_arranger']
3842            else:
3843                arr_inst = instrument
3844            annotations = {'instrument': instrument,
3845                           'vocal': instrument,
3846                           'performing orchestra': instrument,
3847                           'chorus master': options['cea_chorusmaster'],
3848                           'concertmaster': options['cea_concertmaster'],
3849                           'arranger': options['cea_arranger'],
3850                           'instrument arranger': arr_inst,
3851                           'orchestrator': options['cea_orchestrator'],
3852                           'vocal arranger': arr_inst}
3853            tag = tag_strings[artist_type][0]
3854            cea_tag = tag_strings[artist_type][1]
3855            sort_tag = tag_strings[artist_type][2]
3856            cea_sort_tag = tag_strings[artist_type][3]
3857            cea_names_tag = cea_tag[:-1] + '_names'
3858            cea_instrumented_tag = cea_names_tag + '_instrumented'
3859            if artist_type in sub_strings:
3860                if sub_strings[artist_type]:
3861                    tag += sub_strings[artist_type]
3862                else:
3863                    write_log(
3864                            release_id,
3865                            'warning',
3866                            'No instrument/sub-key available for artist_type %s. Performer = %s. Track is %s',
3867                            artist_type,
3868                            performer[2],
3869                            track)
3870
3871            if tag:
3872                if '~ce_tag_cleared_' + \
3873                        tag not in tm or not tm['~ce_tag_cleared_' + tag] == "Y":
3874                    if tag in tm:
3875                        write_log(release_id, 'info', 'delete tag %s', tag)
3876                        del tm[tag]
3877                tm['~ce_tag_cleared_' + tag] = "Y"
3878            if sort_tag:
3879                if '~ce_tag_cleared_' + \
3880                        sort_tag not in tm or not tm['~ce_tag_cleared_' + sort_tag] == "Y":
3881                    if sort_tag in tm:
3882                        del tm[sort_tag]
3883                tm['~ce_tag_cleared_' + sort_tag] = "Y"
3884
3885            name_list = performer[2]
3886            for ind, name in enumerate(name_list):
3887                performer_type = ''
3888                sort_name = performer[3][ind]
3889                no_credit = True
3890                # change name to as-credited
3891                if (performing_artist and options['cea_performer_credited'] or
3892                        not performing_artist and options['cea_composer_credited']):
3893                    if sort_name in self.artist_credits[album]:
3894                        no_credit = False
3895                        name = self.artist_credits[album][sort_name]
3896                # over-ride with aliases and use standard MB name (not
3897                # as-credited) if no alias
3898                if (options['cea_aliases'] or not performing_artist and options['cea_aliases_composer']) and (
3899                        no_credit or options['cea_alias_overrides']):
3900                    if sort_name in self.artist_aliases:
3901                        name = self.artist_aliases[sort_name]
3902                # fix cyrillic names if not already fixed
3903                if options['cea_cyrillic']:
3904                    if not only_roman_chars(name):
3905                        name = remove_middle(unsort(sort_name))
3906                        # Only remove middle name where the existing
3907                        # performer is in non-latin script
3908                annotated_name = name
3909                if instrument:
3910                    instrumented_name = name + ' (' + instrument + ')'
3911                else:
3912                    instrumented_name = name
3913                # add annotations and write performer tags
3914                if artist_type in annotations:
3915                    if annotations[artist_type]:
3916                        annotated_name += ' (' + annotations[artist_type] + ')'
3917                    else:
3918                        write_log(
3919                                release_id,
3920                                'warning',
3921                                'No annotation (instrument) available for artist_type %s.'
3922                                ' Performer = %s. Track is %s',
3923                                artist_type,
3924                                performer[2],
3925                                track)
3926                if artist_type in insertions and options['cea_arrangers']:
3927                    self.append_tag(release_id, tm, tag, annotated_name)
3928                else:
3929                    if options['cea_arrangers'] or artist_type == tag:
3930                        self.append_tag(release_id, tm, tag, name)
3931
3932                if options['cea_arrangers'] or artist_type == tag:
3933                    if sort_tag:
3934                        self.append_tag(release_id, tm, sort_tag, sort_name)
3935                        if options['cea_tag_sort'] and '~' in sort_tag:
3936                            explicit_sort_tag = sort_tag.replace('~', '')
3937                            self.append_tag(
3938                                release_id, tm, explicit_sort_tag, sort_name)
3939
3940                self.append_tag(release_id, tm, cea_tag, annotated_name)
3941                self.append_tag(release_id, tm, cea_names_tag, name)
3942                if instrumented_name != name:
3943                    self.append_tag(
3944                        release_id,
3945                        tm,
3946                        cea_instrumented_tag,
3947                        instrumented_name)
3948
3949                if cea_sort_tag:
3950                    self.append_tag(release_id, tm, cea_sort_tag, sort_name)
3951
3952                # differentiate soloists etc and write related tags
3953                if artist_type == 'performing orchestra' or (
3954                        instrument and instrument in self.ENSEMBLE_TYPES) or self.ensemble_type(name):
3955                    performer_type = 'ensembles'
3956                    self.append_tag(
3957                        release_id, tm, '~cea_ensembles', instrumented_name)
3958                    self.append_tag(
3959                        release_id, tm, '~cea_ensemble_names', name)
3960                    self.append_tag(
3961                        release_id, tm, '~cea_ensembles_sort', sort_name)
3962                elif artist_type in ['performer', 'instrument', 'vocal']:
3963                    performer_type = 'soloists'
3964                    self.append_tag(
3965                        release_id, tm, '~cea_soloists', instrumented_name)
3966                    self.append_tag(release_id, tm, '~cea_soloist_names', name)
3967                    self.append_tag(
3968                        release_id, tm, '~cea_soloists_sort', sort_name)
3969                    if artist_type == "vocal":
3970                        self.append_tag(
3971                            release_id, tm, '~cea_vocalists', instrumented_name)
3972                        self.append_tag(
3973                            release_id, tm, '~cea_vocalist_names', name)
3974                        self.append_tag(
3975                            release_id, tm, '~cea_vocalists_sort', sort_name)
3976                    elif instrument:
3977                        self.append_tag(
3978                            release_id, tm, '~cea_instrumentalists', instrumented_name)
3979                        self.append_tag(
3980                            release_id, tm, '~cea_instrumentalist_names', name)
3981                        self.append_tag(
3982                            release_id, tm, '~cea_instrumentalists_sort', sort_name)
3983                    else:
3984                        self.append_tag(
3985                            release_id, tm, '~cea_other_soloists', instrumented_name)
3986                        self.append_tag(
3987                            release_id, tm, '~cea_other_soloist_names', name)
3988                        self.append_tag(
3989                            release_id, tm, '~cea_other_soloists_sort', sort_name)
3990
3991                # set album artists
3992                if performer_type or artist_type == 'conductor':
3993                    cea_album_tag = cea_tag.replace(
3994                        'cea', 'cea_album').replace(
3995                        'performers', performer_type)
3996                    cea_album_sort_tag = cea_sort_tag.replace(
3997                        'cea', 'cea_album').replace(
3998                        'performers', performer_type)
3999                    if stripsir(name) in tm['~albumartists'] or stripsir(
4000                            sort_name) in tm['~albumartists_sort']:
4001                        self.append_tag(release_id, tm, cea_album_tag, name)
4002                        self.append_tag(
4003                            release_id, tm, cea_album_sort_tag, sort_name)
4004                    else:
4005                        if performer_type:
4006                            self.append_tag(
4007                                release_id, tm, '~cea_support_performers', instrumented_name)
4008                            self.append_tag(
4009                                release_id, tm, '~cea_support_performer_names', name)
4010                            self.append_tag(
4011                                release_id, tm, '~cea_support_performers_sort', sort_name)
4012
4013##############
4014##############
4015# WORK PARTS #
4016##############
4017##############
4018
4019
4020class PartLevels():
4021    # QUEUE-HANDLING
4022    class WorksQueue(LockableObject):
4023        """Object for managing the queue of lookups"""
4024
4025        def __init__(self):
4026            LockableObject.__init__(self)
4027            self.queue = {}
4028
4029        def __contains__(self, name):
4030            return name in self.queue
4031
4032        def __iter__(self):
4033            return self.queue.__iter__()
4034
4035        def __getitem__(self, name):
4036            self.lock_for_read()
4037            value = self.queue[name] if name in self.queue else None
4038            self.unlock()
4039            return value
4040
4041        def __setitem__(self, name, value):
4042            self.lock_for_write()
4043            self.queue[name] = value
4044            self.unlock()
4045
4046        def append(self, name, value):
4047            self.lock_for_write()
4048            if name in self.queue:
4049                self.queue[name].append(value)
4050                value = False
4051            else:
4052                self.queue[name] = [value]
4053                value = True
4054            self.unlock()
4055            return value
4056
4057        def remove(self, name):
4058            self.lock_for_write()
4059            value = None
4060            if name in self.queue:
4061                value = self.queue[name]
4062                del self.queue[name]
4063            self.unlock()
4064            return value
4065
4066        # INITIALISATION
4067
4068    def __init__(self):
4069        self.works_cache = {}
4070        # maintains list of parent of each workid, or None if no parent found,
4071        # so that XML lookup need only executed if no existing record
4072
4073        self.partof = collections.defaultdict(dict)
4074        # the inverse of the above (immediate children of each parent)
4075        # but note that this is specific to the album as children may vary between albums
4076        # so format is {album1{parent1: child1, parent2:, child2},
4077        # album2{....}}
4078
4079        self.works_queue = self.WorksQueue()
4080        # lookup queue - holds track/album pairs for each queued workid (may be
4081        # more than one pair per id, especially for higher-level parts)
4082
4083        self.parts = collections.defaultdict(
4084            lambda: collections.defaultdict(dict))
4085        # metadata collection for all parts - structure is {workid: {name: ,
4086        # parent: , (track,album): {part_levels}}, etc}
4087
4088        self.top_works = collections.defaultdict(dict)
4089        # metadata collection for top-level works for (track, album) -
4090        # structure is {(track, album): {workId: }, etc}
4091
4092        self.trackback = collections.defaultdict(
4093            lambda: collections.defaultdict(dict))
4094        # hierarchical iterative work structure - {album: {id: , children:{id:
4095        # , children{}, id: etc}, id: etc} }
4096
4097        self.child_listing = collections.defaultdict(list)
4098        # contains list of workIds which are descendants of a given workId, to
4099        # prevent recursion when adding new ids
4100
4101        self.work_listing = collections.defaultdict(list)
4102        # contains list of workIds for each album
4103
4104        self.top = collections.defaultdict(list)
4105        # self.top[album] = list of work Ids which are top-level works in album
4106
4107        self.options = collections.defaultdict(dict)
4108        # active Classical Extras options for current track
4109
4110        self.synonyms = collections.defaultdict(dict)
4111        # active synonym options for current track
4112
4113        self.replacements = collections.defaultdict(dict)
4114        # active synonym options for current track
4115
4116        self.file_works = collections.defaultdict(list)
4117        # list of works derived from SongKong-style file tags
4118        # structure is {(album, track): [{workid: , name: }, {workid: ....}}
4119
4120        self.album_artists = collections.defaultdict(
4121            lambda: collections.defaultdict(dict))
4122        # collection of artists to be applied at album level
4123
4124        self.artist_aliases = {}
4125        # collection of alias names - format is {sort_name: alias_name, ...}
4126
4127        self.artist_credits = collections.defaultdict(dict)
4128        # collection of credited-as names - format is {album: {sort_name: credit_name,
4129        # ...}, ...}
4130
4131        self.release_artists_sort = collections.defaultdict(list)
4132        # collection of release artists - format is {album: [sort_name_1,
4133        # sort_name_2, ...]}
4134
4135        self.lyricist_filled = collections.defaultdict(dict)
4136        # Boolean for each track to indicate if lyricist has been found (don't
4137        # want to add more from higher levels)
4138
4139        self.orphan_tracks = collections.defaultdict(list)
4140        # To keep a list for each album of tracks which do not have works -
4141        # format is {album: [track1, track2, ...], etc}
4142
4143        self.tracks = collections.defaultdict(
4144            lambda: collections.defaultdict(dict))
4145        # To keep a list of all tracks for the album - format is {album:
4146        # {track1: {movement-group: movementgroup, movement-number: movementnumber},
4147        #  track2: {}, ..., etc}, album2: etc}
4148
4149    ########################################
4150    # SECTION 1 - Initial track processing #
4151    ########################################
4152
4153    def add_work_info(
4154            self,
4155            album,
4156            track_metadata,
4157            trackXmlNode,
4158            releaseXmlNode):
4159        """
4160        Main Routine - run for each track
4161        :param album:
4162        :param track_metadata:
4163        :param trackXmlNode:
4164        :param releaseXmlNode:
4165        :return:
4166        """
4167        release_id = track_metadata['musicbrainz_albumid']
4168        if 'start' not in release_status[release_id]:
4169            release_status[release_id]['start'] = datetime.now()
4170        if 'lookups' not in release_status[release_id]:
4171            release_status[release_id]['lookups'] = 0
4172        release_status[release_id]['name'] = track_metadata['album']
4173        release_status[release_id]['works'] = True
4174        if config.setting['log_debug'] or config.setting['log_info']:
4175            write_log(
4176                release_id,
4177                'debug',
4178                'STARTING WORKS PROCESSING FOR ALBUM %s, DISC %s, TRACK %s',
4179                track_metadata['album'],
4180                track_metadata['discnumber'],
4181                track_metadata['tracknumber'] +
4182                ' ' +
4183                track_metadata['title'])
4184        # clear the cache if required (if this is not done, then queue count may get out of sync)
4185        # Jump through hoops to get track object!!
4186        track = album._new_tracks[-1]
4187        tm = track.metadata
4188        if config.setting['log_debug'] or config.setting['log_info']:
4189            write_log(
4190                release_id,
4191                'debug',
4192                'Cache setting for track %s is %s',
4193                track,
4194                config.setting['use_cache'])
4195
4196        # OPTIONS - OVER-RIDE IF REQUIRED
4197        if '~ce_options' not in tm:
4198            if config.setting['log_debug'] or config.setting['log_info']:
4199                write_log(release_id, 'debug', 'Workparts gets track first...')
4200            get_options(release_id, album, track)
4201        options = interpret(tm['~ce_options'])
4202
4203        if not options:
4204            if config.setting['log_error']:
4205                write_log(
4206                    release_id,
4207                    'error',
4208                    'Work Parts. Failure to read saved options for track %s. options = %s',
4209                    track,
4210                    tm['~ce_options'])
4211            options = option_settings(config.setting)
4212        self.options[track] = options
4213
4214        # CONSTANTS
4215        write_log(release_id, 'basic', 'Options: %s' ,options)
4216        self.ERROR = options["log_error"]
4217        self.WARNING = options["log_warning"]
4218        self.SEPARATORS = ['; ']
4219        self.EQ = "EQ_TO_BE_REVERSED"  # phrase to indicate that a synonym has been used
4220
4221        self.get_sk_tags(release_id, album, track, tm, options)
4222        self.synonyms[track] = self.get_text_tuples(
4223            release_id, track, 'synonyms')  # a list of tuples
4224        self.replacements[track] = self.get_text_tuples(
4225            release_id, track, 'replacements')  # a list of tuples
4226
4227        # Continue?
4228        if not options["classical_work_parts"]:
4229            return
4230
4231        # OPTION-DEPENDENT CONSTANTS:
4232        # Maximum number of XML- lookup retries if error returned from server
4233        self.MAX_RETRIES = options["cwp_retries"]
4234        self.USE_CACHE = options["use_cache"]
4235        if options["cwp_partial"] and options["cwp_partial_text"] and options["cwp_level0_works"]:
4236            options["cwp_removewords_p"] = options["cwp_removewords"] + \
4237                ", " + options["cwp_partial_text"] + ' '
4238        else:
4239            options["cwp_removewords_p"] = options["cwp_removewords"]
4240        # Explanation:
4241        # If "Partial" is selected then the level 0 work name will have PARTIAL_TEXT appended to it.
4242        # If a recording is split across several tracks then each sub-part (quasi-movement) will have the same name
4243        # (with the PARTIAL_TEXT added). If level 0 is used to source work names then the level 1 work name will be
4244        # changed to be this repeated name and will therefore also include PARTIAL_TEXT.
4245        # So we need to add PARTIAL_TEXT to the prefixes list to ensure it is excluded from the level 1 work name.
4246        #
4247        write_log(
4248                release_id,
4249                'debug',
4250                "PartLevels - LOAD NEW TRACK: :%s",
4251                track)
4252        # write_log(release_id, 'info', "trackXmlNode:") # warning - may break Picard
4253
4254        # first time for this album (reloads each refresh)
4255        if tm['discnumber'] == '1' and tm['tracknumber'] == '1':
4256            # get artist aliases - these are cached so can be re-used across
4257            # releases, but are reloaded with each refresh
4258            get_aliases(self, release_id, album, options, releaseXmlNode)
4259
4260        # fix titles which include composer name
4261        composersort =[]
4262        if 'compposersort' in tm:
4263            composersort = str_to_list(['composersort'])
4264        composerlastnames = []
4265        for composer in composersort:
4266            lname = re.compile(r'(.*),')
4267            match = lname.search(composer)
4268            if match:
4269                composerlastnames.append(match.group(1))
4270            else:
4271                composerlastnames.append(composer)
4272        title = track_metadata['title']
4273        colons = title.count(":")
4274        if colons > 0:
4275            title_split = title.split(': ', 1)
4276            test = title_split[0]
4277            if test in composerlastnames:
4278                track_metadata['~cwp_title'] = title_split[1]
4279
4280        # now process works
4281        write_log(
4282                release_id,
4283                'info',
4284                'PartLevels - add_work_info - metadata load = %r',
4285                track_metadata)
4286        workIds = []
4287        if 'musicbrainz_workid' in tm:
4288            workIds = str_to_list(tm['musicbrainz_workid'])
4289        if workIds and not (options["ce_no_run"] and (
4290                not tm['~ce_file'] or tm['~ce_file'] == "None")):
4291            self.build_work_info(release_id, options, trackXmlNode, album, track, track_metadata, workIds)
4292
4293        else:  # no work relation
4294            write_log(
4295                    release_id,
4296                    'warning',
4297                    "WARNING - no works for this track: \"%s\"",
4298                    title)
4299            self.append_tag(
4300                release_id,
4301                track_metadata,
4302                '~cwp_warning',
4303                '3. No works for this track')
4304            if album in self.orphan_tracks:
4305                if track not in self.orphan_tracks[album]:
4306                    self.orphan_tracks[album].append(track)
4307            else:
4308                self.orphan_tracks[album] = [track]
4309            # Don't publish metadata yet until all album is processed
4310
4311        # last track
4312        write_log(
4313                release_id,
4314                'debug',
4315                'Check for last track. Requests = %s, Tracknumber = %s, Totaltracks = %s,'
4316                ' Discnumber = %s, Totaldiscs = %s',
4317                album._requests,
4318                track_metadata['tracknumber'],
4319                track_metadata['totaltracks'],
4320                track_metadata['discnumber'],
4321                track_metadata['totaldiscs'])
4322        if album._requests == 0 and track_metadata['tracknumber'] == track_metadata[
4323                'totaltracks'] and track_metadata['discnumber'] == track_metadata['totaldiscs']:
4324            self.process_album(release_id, album)
4325            release_status[release_id]['works-done'] = datetime.now()
4326            close_log(release_id, 'works')
4327
4328
4329    def build_work_info(self, release_id, options, trackXmlNode, album, track, track_metadata, workIds):
4330        """
4331        Construct the work metadata, taking into account partial recordings and medleys
4332        :param release_id:
4333        :param options:
4334        :param trackXmlNode: JSON returned by the webservice
4335        :param album:
4336        :param track:
4337        :param track_metadata:
4338        :param workIds: work ids for this track
4339        :return:
4340        """
4341        work_list_info = []
4342        keyed_workIds = {}
4343        for i, workId in enumerate(workIds):
4344
4345            # sort by ordering_key, if any
4346            match_tree = [
4347                'recording',
4348                'relations',
4349                'target-type:work',
4350                'work',
4351                'id:' + workId]
4352            rels = parse_data(release_id, trackXmlNode, [], *match_tree)
4353            # for recordings which are ordered within track:-
4354            match_tree_1 = [
4355                'ordering-key']
4356            # for recordings of works which are ordered as part of parent
4357            # (may be duplicated by top-down check later):-
4358            match_tree_2 = [
4359                'relations',
4360                'target-type:work',
4361                'type:parts',
4362                'direction:backward',
4363                'ordering-key']
4364            parse_result = parse_data(release_id,
4365                                      rels,
4366                                      [],
4367                                      *match_tree_1) + parse_data(release_id,
4368                                                                  rels,
4369                                                                  [],
4370                                                                  *match_tree_2)
4371            write_log(
4372                release_id,
4373                'info',
4374                'multi-works - ordering key: %s',
4375                parse_result)
4376            if parse_result:
4377                if isinstance(parse_result[0], int):
4378                    key = parse_result[0]
4379                elif isinstance(parse_result[0], str) and parse_result[0].isdigit():
4380                    key = int(parse_result[0])
4381                else:
4382                    key = 100 + i
4383            else:
4384                key = 100 + i
4385            keyed_workIds[key] = workId
4386        partial = False
4387        for key in sorted(keyed_workIds):
4388            workId = keyed_workIds[key]
4389            work_rels = parse_data(
4390                release_id,
4391                trackXmlNode,
4392                [],
4393                'recording',
4394                'relations',
4395                'target-type:work',
4396                'work.id:' + workId)
4397            write_log(release_id, 'info', 'work_rels: %s', work_rels)
4398            work_attributes = parse_data(
4399                release_id, work_rels, [], 'attributes')[0]
4400            write_log(
4401                release_id,
4402                'info',
4403                'work_attributes: %s',
4404                work_attributes)
4405            work_titles = parse_data(
4406                release_id, work_rels, [], 'work', 'title')
4407            work_list_info_item = {
4408                'id': workId,
4409                'attributes': work_attributes,
4410                'titles': work_titles}
4411            work_list_info.append(work_list_info_item)
4412            work = []
4413            for title in work_titles:
4414                work.append(title)
4415            if options['cwp_partial']:
4416                # treat the recording as work level 0 and the work of which it
4417                # is a partial recording as work level 1
4418                if 'partial' in work_attributes:
4419                    partial = True
4420                    parentId = workId
4421                    workId = track_metadata['musicbrainz_recordingid']
4422
4423                    works = []
4424                    for w in work:
4425                        partwork = w
4426                        works.append(partwork)
4427
4428                    write_log(
4429                        release_id,
4430                        'info',
4431                        "Id %s is PARTIAL RECORDING OF id: %s, name: %s",
4432                        workId,
4433                        parentId,
4434                        work)
4435                    work_list_info_item = {
4436                        'id': workId,
4437                        'attributes': [],
4438                        'titles': works,
4439                        'parent': parentId}
4440                    work_list_info.append(work_list_info_item)
4441        write_log(
4442            release_id,
4443            'info',
4444            'work_list_info: %s',
4445            work_list_info)
4446        # we now have a list of items, where the id of each is a work id for the track or
4447        #  (multiple instances of) the recording id (for partial works)
4448        # we need to turn this into a usable hierarchy - i.e. just one item
4449        workId_list = []
4450        work_list = []
4451        parent_list = []
4452        attribute_list = []
4453        workId_list_p = []
4454        work_list_p = []
4455        attribute_list_p = []
4456        for w in work_list_info:
4457            if 'partial' not in w['attributes'] or not options[
4458                'cwp_partial']:  # just do the bottom-level 'works' first
4459                workId_list.append(w['id'])
4460                work_list += w['titles']
4461                attribute_list += w['attributes']
4462                if 'parent' in w:
4463                    if w['parent'] not in parent_list:  # avoid duplicating parents!
4464                        parent_list.append(w['parent'])
4465            else:
4466                workId_list_p.append(w['id'])
4467                work_list_p += w['titles']
4468                attribute_list_p += w['attributes']
4469        # de-duplicate work names
4470        # list(set()) won't work as need to retain order
4471        work_list = list(collections.OrderedDict.fromkeys(work_list))
4472        work_list_p = list(collections.OrderedDict.fromkeys(work_list_p))
4473
4474        workId_tuple = tuple(workId_list)
4475        workId_tuple_p = tuple(workId_list_p)
4476        if workId_tuple not in self.work_listing[album]:
4477            self.work_listing[album].append(workId_tuple)
4478        if workId_tuple not in self.parts or not self.USE_CACHE:
4479            self.parts[workId_tuple]['name'] = work_list
4480            if parent_list:
4481                if workId_tuple in self.works_cache:
4482                    self.works_cache[workId_tuple] += parent_list
4483                    self.parts[workId_tuple]['parent'] += parent_list
4484                else:
4485                    self.works_cache[workId_tuple] = parent_list
4486                    self.parts[workId_tuple]['parent'] = parent_list
4487                self.parts[workId_tuple_p]['name'] = work_list_p
4488                if workId_tuple_p not in self.work_listing[album]:
4489                    self.work_listing[album].append(workId_tuple_p)
4490
4491            if 'medley' in attribute_list_p:
4492                self.parts[workId_tuple_p]['medley'] = True
4493
4494            if 'medley' in attribute_list:
4495                self.parts[workId_tuple]['medley'] = True
4496
4497            if partial:
4498                self.parts[workId_tuple]['partial'] = True
4499
4500        self.trackback[album][workId_tuple]['id'] = workId_list
4501        if 'meta' in self.trackback[album][workId_tuple]:
4502            if (track,
4503                album) not in self.trackback[album][workId_tuple]['meta']:
4504                self.trackback[album][workId_tuple]['meta'].append(
4505                    (track, album))
4506        else:
4507            self.trackback[album][workId_tuple]['meta'] = [(track, album)]
4508        write_log(
4509            release_id,
4510            'info',
4511            "Trackback for %s is %s. Partial = %s",
4512            track,
4513            self.trackback[album][workId_tuple],
4514            partial)
4515
4516        if workId_tuple in self.works_cache and (
4517                self.USE_CACHE or partial):
4518            write_log(
4519                release_id,
4520                'debug',
4521                "GETTING WORK METADATA FROM CACHE, for work %s",
4522                workId_tuple)
4523            if workId_tuple not in self.work_listing[album]:
4524                self.work_listing[album].append(workId_tuple)
4525            not_in_cache = self.check_cache(
4526                track_metadata, album, track, workId_tuple, [])
4527        else:
4528            if partial:
4529                not_in_cache = [workId_tuple_p]
4530            else:
4531                not_in_cache = [workId_tuple]
4532        for workId_tuple in not_in_cache:
4533            if not self.USE_CACHE:
4534                if workId_tuple in self.works_cache:
4535                    del self.works_cache[workId_tuple]
4536            self.work_not_in_cache(release_id, album, track, workId_tuple)
4537
4538
4539    def get_sk_tags(self, release_id, album, track, tm, options):
4540        """
4541        Get file tags which are consistent with SongKong's metadata usage
4542        :param release_id: name for log file - usually =musicbrainz_albumid
4543        unless called outside metadata processor
4544        :param album:
4545        :param track:
4546        :param tm:
4547        :param options:
4548        :return:
4549        """
4550        if options["cwp_use_sk"]:
4551            if '~ce_file' in tm and interpret(tm['~ce_file']):
4552                music_file = tm['~ce_file']
4553                orig_metadata = album.tagger.files[music_file].orig_metadata
4554                if 'musicbrainz_work_composition_id' in orig_metadata and 'musicbrainz_workid' in orig_metadata:
4555                    if 'musicbrainz_work_composition' in orig_metadata:
4556                        if 'musicbrainz_work' in orig_metadata:
4557                            if orig_metadata['musicbrainz_work_composition_id'] == orig_metadata[
4558                                'musicbrainz_workid'] \
4559                                    and orig_metadata['musicbrainz_work_composition'] != orig_metadata[
4560                                        'musicbrainz_work']:
4561                                # Picard may have overwritten SongKong tag (top
4562                                # work id) with bottom work id
4563                                write_log(
4564                                    release_id,
4565                                    'warning',
4566                                    'File tag musicbrainz_workid incorrect? id = %s. Sourcing from MB',
4567                                    orig_metadata['musicbrainz_workid'])
4568                                if self.WARNING:
4569                                    self.append_tag(
4570                                        release_id,
4571                                        tm,
4572                                        '~cwp_warning',
4573                                        '4. File tag musicbrainz_workid incorrect? id = ' +
4574                                        orig_metadata['musicbrainz_workid'] +
4575                                        '. Sourcing from MB')
4576                                return None
4577                        write_log(
4578                                release_id,
4579                                'info',
4580                                'Read from file tag: musicbrainz_work_composition_id: %s',
4581                                orig_metadata['musicbrainz_work_composition_id'])
4582                        self.file_works[(album, track)].append({
4583                            'workid': orig_metadata['musicbrainz_work_composition_id'].split('; '),
4584                            'name': orig_metadata['musicbrainz_work_composition']})
4585                    else:
4586                        wid = orig_metadata['musicbrainz_work_composition_id']
4587                        write_log(
4588                            release_id,
4589                            'error',
4590                            "No matching work name for id tag %s",
4591                            wid)
4592                        if self.ERROR:
4593                            self.append_tag(
4594                                release_id,
4595                                tm,
4596                                '~cwp_error',
4597                                '2. No matching work name for id tag ' +
4598                                wid)
4599                        return None
4600                    n = 1
4601                    while 'musicbrainz_work_part_level' + \
4602                            str(n) + '_id' in orig_metadata:
4603                        if 'musicbrainz_work_part_level' + \
4604                                str(n) in orig_metadata:
4605                            self.file_works[(album, track)].append({
4606                                'workid': orig_metadata[
4607                                    'musicbrainz_work_part_level' + str(n) + '_id'].split('; '),
4608                                'name': orig_metadata['musicbrainz_work_part_level' + str(n)]})
4609                            n += 1
4610                        else:
4611                            wid = orig_metadata['musicbrainz_work_part_level' +
4612                                                str(n) + '_id']
4613                            write_log(
4614                                release_id, 'error', "No matching work name for id tag %s", wid)
4615                            if self.ERROR:
4616                                self.append_tag(
4617                                    release_id,
4618                                    tm,
4619                                    '~cwp_error',
4620                                    '2. No matching work name for id tag ' +
4621                                    wid)
4622                            break
4623                    if orig_metadata['musicbrainz_work_composition_id'] != orig_metadata[
4624                            'musicbrainz_workid']:
4625                        if 'musicbrainz_work' in orig_metadata:
4626                            self.file_works[(album, track)].append({
4627                                'workid': orig_metadata['musicbrainz_workid'].split('; '),
4628                                'name': orig_metadata['musicbrainz_work']})
4629                        else:
4630                            wid = orig_metadata['musicbrainz_workid']
4631                            write_log(
4632                                release_id, 'error', "No matching work name for id tag %s", wid)
4633                            if self.ERROR:
4634                                self.append_tag(
4635                                    release_id,
4636                                    tm,
4637                                    '~cwp_error',
4638                                    '2. No matching work name for id tag ' +
4639                                    wid)
4640                            return None
4641                    file_work_levels = len(self.file_works[(album, track)])
4642                    write_log(release_id,
4643                                  'debug',
4644                                  'Loaded works from file tags for track %s. Works: %s: ',
4645                                  track,
4646                                  self.file_works[(album,
4647                                                   track)])
4648                    for i, work in enumerate(self.file_works[(album, track)]):
4649                        workId = tuple(work['workid'])
4650                        if workId not in self.works_cache:  # Use cache in preference to file tags
4651                            if workId not in self.work_listing[album]:
4652                                self.work_listing[album].append(workId)
4653                            self.parts[workId]['name'] = [work['name']]
4654                            parentId = None
4655                            parent = ''
4656                            if i < file_work_levels - 1:
4657                                parentId = self.file_works[(
4658                                    album, track)][i + 1]['workid']
4659                                parent = self.file_works[(
4660                                    album, track)][i + 1]['name']
4661
4662                            if parentId:
4663                                self.works_cache[workId] = parentId
4664                                self.parts[workId]['parent'] = parentId
4665                                self.parts[tuple(parentId)]['name'] = [parent]
4666                            else:
4667                                # so we remember we looked it up and found none
4668                                self.parts[workId]['no_parent'] = True
4669                                self.top_works[(track, album)
4670                                               ]['workId'] = workId
4671                                if workId not in self.top[album]:
4672                                    self.top[album].append(workId)
4673
4674    def check_cache(self, tm, album, track, workId_tuple, not_in_cache):
4675        """
4676        Recursive loop to get cached works
4677        :param tm:
4678        :param album:
4679        :param track:
4680        :param workId_tuple:
4681        :param not_in_cache:
4682        :return:
4683        """
4684        parentId_tuple = tuple(self.works_cache[workId_tuple])
4685        if parentId_tuple not in self.work_listing[album]:
4686            self.work_listing[album].append(parentId_tuple)
4687
4688        if parentId_tuple in self.works_cache:
4689            self.check_cache(tm, album, track, parentId_tuple, not_in_cache)
4690        else:
4691            not_in_cache.append(parentId_tuple)
4692        return not_in_cache
4693
4694    def work_not_in_cache(self, release_id, album, track, workId_tuple):
4695        """
4696        Determine actions if work not in cache (is it the top or do we need to look up?)
4697        :param release_id: name for log file - usually =musicbrainz_albumid
4698        unless called outside metadata processor
4699        :param album:
4700        :param track:
4701        :param workId_tuple:
4702        :return:
4703        """
4704
4705        write_log(
4706                release_id,
4707                'debug',
4708                'Processing work_not_in_cache for workId %s',
4709                workId_tuple)
4710        ## NB the first condition below is to prevent the side effect of assigning a dictionary entry in self.parts for workId with no details
4711        if workId_tuple in self.parts and 'no_parent' in self.parts[workId_tuple] and (
4712                self.USE_CACHE or self.options[track]["cwp_use_sk"]) and self.parts[workId_tuple]['no_parent']:
4713            write_log(release_id, 'info', '%s is top work', workId_tuple)
4714            self.top_works[(track, album)]['workId'] = workId_tuple
4715            if album in self.top:
4716                if workId_tuple not in self.top[album]:
4717                    self.top[album].append(workId_tuple)
4718            else:
4719                self.top[album] = [workId_tuple]
4720        else:
4721            write_log(
4722                    release_id,
4723                    'info',
4724                    'Calling work_add_track to look up parents for %s',
4725                    workId_tuple)
4726            for workId in workId_tuple:
4727                self.work_add_track(album, track, workId, 0)
4728
4729        write_log(
4730                release_id,
4731                'debug',
4732                'End of work_not_in_cache for workId %s',
4733                workId_tuple)
4734
4735    def work_add_track(self, album, track, workId, tries, user_data=True):
4736        """
4737        Add the work to the lookup queue
4738        :param user_data:
4739        :param album:
4740        :param track:
4741        :param workId:
4742        :param tries: number of lookup attempts
4743        :return:
4744        """
4745        release_id = track.metadata['musicbrainz_albumid']
4746        write_log(
4747                release_id,
4748                'debug',
4749                "ADDING WORK TO LOOKUP QUEUE for work %s",
4750                workId)
4751        self.album_add_request(release_id, album)
4752        # to change the _requests variable to indicate that there are pending
4753        # requests for this item and delay Picard from finalizing the album
4754        write_log(
4755                release_id,
4756                'debug',
4757                "Added lookup request for id %s. Requests = %s",
4758                workId,
4759                album._requests)
4760        if self.works_queue.append(
4761                workId,
4762                (track,
4763                 album)):  # All work combos are queued, but only new workIds are passed to XML lookup
4764            host = config.setting["server_host"]
4765            port = config.setting["server_port"]
4766            path = "/ws/2/%s/%s" % ('work', workId)
4767            if config.setting['cwp_aliases'] and config.setting['cwp_aliases_tag_text']:
4768                if config.setting['cwp_aliases_tags_user'] and user_data:
4769                    login = True
4770                    tag_type = '+tags +user-tags'
4771                else:
4772                    login = False
4773                    tag_type = '+tags'
4774            else:
4775                login = False
4776                tag_type = ''
4777            queryargs = {
4778                "inc": "work-rels+artist-rels+label-rels+place-rels+aliases" +
4779                tag_type}
4780            write_log(
4781                    release_id,
4782                    'debug',
4783                    "Initiating XML lookup for %s......",
4784                    workId)
4785            if release_id in release_status and 'lookups' in release_status[release_id]:
4786                release_status[release_id]['lookups'] += 1
4787            return album.tagger.webservice.get(
4788                host,
4789                port,
4790                path,
4791                partial(
4792                    self.work_process,
4793                    workId,
4794                    tries),
4795                # parse_response_type="xml",
4796                priority=True,
4797                important=False,
4798                mblogin=login,
4799                queryargs=queryargs)
4800        else:
4801            write_log(
4802                    release_id,
4803                    'debug',
4804                    "Work is already in queue: %s",
4805                    workId)
4806
4807    ##########################################################################
4808    # SECTION 2 - Works processing                                                                     #
4809    # NB These functions may operate asynchronously over multiple albums (as well as multiple tracks)  #
4810    ##########################################################################
4811
4812    def work_process(self, workId, tries, response, reply, error):
4813        """
4814        Top routine to process the XML/JSON node response from the lookup
4815        NB This function may operate over multiple albums (as well as multiple tracks)
4816        :param workId:
4817        :param tries:
4818        :param response:
4819        :param reply:
4820        :param error:
4821        :return:
4822        """
4823
4824        if error:
4825            tuples = self.works_queue.remove(workId)
4826            for track, album in tuples:
4827                release_id = track.metadata['musicbrainz_albumid']
4828                write_log(
4829                        release_id,
4830                        'warning',
4831                        "%r: Network error retrieving work record. Error code %r",
4832                        workId,
4833                        error)
4834                write_log(
4835                        release_id,
4836                        'debug',
4837                        "Removed request after network error. Requests = %s",
4838                        album._requests)
4839                if tries < self.MAX_RETRIES:
4840                    user_data = True
4841                    write_log(release_id, 'debug', "REQUEUEING...")
4842                    if str(error) == '204':  # Authentication error
4843                        write_log(
4844                                release_id, 'debug', "... without user authentication")
4845                        user_data = False
4846                        self.append_tag(
4847                            release_id,
4848                            track.metadata,
4849                            '~cwp_error',
4850                            '3. Authentication failure - data retrieval omits user-specific requests')
4851                    self.work_add_track(
4852                        album, track, workId, tries + 1, user_data)
4853                else:
4854                    write_log(
4855                        release_id,
4856                        'error',
4857                        "EXHAUSTED MAX RE-TRIES for XML lookup for track %s",
4858                        track)
4859                    if self.ERROR:
4860                        self.append_tag(
4861                            release_id,
4862                            track.metadata,
4863                            '~cwp_error',
4864                            "4. ERROR: MISSING METADATA due to network errors. Re-try or fix manually.")
4865                self.album_remove_request(release_id, album)
4866            return
4867
4868        tuples = self.works_queue.remove(workId)
4869        if tuples:
4870            new_queue = []
4871            prev_album = None
4872            album = tuples[0][1] # just added to prevent technical "reference before assignment" error
4873            release_id = 'No_release_id'
4874            for (track, album) in tuples:
4875                release_id = track.metadata['musicbrainz_albumid']
4876                # Note that this need to be set here as the work may cover
4877                # multiple albums
4878                if album != prev_album:
4879                    write_log(release_id, 'debug',
4880                                  "Work_process. FOUND WORK: %s for album %s",
4881                                  workId, album)
4882                    write_log(
4883                        release_id,
4884                        'debug',
4885                        "Requests for album %s = %s",
4886                        album,
4887                        album._requests)
4888                prev_album = album
4889                write_log(release_id, 'info', "RESPONSE = %s", response)
4890                # find the id_tuple(s) key with workId in it
4891                wid_list = []
4892                for w in self.work_listing[album]:
4893                    if workId in w and w not in wid_list:
4894                        wid_list.append(w)
4895                write_log(
4896                        release_id,
4897                        'info',
4898                        'wid_list for %s is %s',
4899                        workId,
4900                        wid_list)
4901                for wid in wid_list:  # wid is a tuple
4902                    write_log(
4903                            release_id,
4904                            'info',
4905                            'processing workId tuple: %r',
4906                            wid)
4907                    metaList = self.work_process_metadata(
4908                        release_id, workId, wid, track, response)
4909                    parentList = metaList[0]
4910                    # returns [[parent id], [parent name], attribute_list] or None if no parent
4911                    # found
4912                    arrangers = metaList[1]
4913                    # not just arrangers - also composers, lyricists etc.
4914                    if wid in self.parts:
4915
4916                        if arrangers:
4917                            if 'arrangers' in self.parts[wid]:
4918                                self.parts[wid]['arrangers'] += arrangers
4919                            else:
4920                                self.parts[wid]['arrangers'] = arrangers
4921
4922                        if parentList:
4923                            # first fix the sort order of multi-works at the prev level
4924                            # so that recordings of multiple movements of the same parent work will have the
4925                            # movements listed in the correct order (i.e.
4926                            # ordering-key, if available)
4927                            if len(wid) > 1:
4928                                for idx in wid:
4929                                    if idx == workId:
4930                                        match_tree = [
4931                                            'relations',
4932                                            'target-type:work',
4933                                            'direction:backward',
4934                                            'ordering-key']
4935                                        parse_result = parse_data(
4936                                            release_id, response, [], *match_tree)
4937                                        write_log(
4938                                                release_id,
4939                                                'info',
4940                                                'multi-works - ordering key for id %s is %s',
4941                                                idx,
4942                                                parse_result)
4943                                        if parse_result:
4944                                            if isinstance(
4945                                                    parse_result[0], str) and parse_result[0].isdigit():
4946                                                key = int(parse_result[0])
4947                                            elif isinstance(parse_result[0], int):
4948                                                key = parse_result[0]
4949                                            else:
4950                                                key = 9999
4951                                            self.parts[wid]['order'][idx] = key
4952
4953                            parentIds = parentList[0]
4954                            parents = parentList[1]
4955                            parent_attributes = parentList[2]
4956                            write_log(
4957                                    release_id,
4958                                    'info',
4959                                    'Parents - ids: %s, names: %s',
4960                                    parentIds,
4961                                    parents)
4962                            # remove any parents that are descendants of wid as
4963                            # they will result in circular references
4964                            del_list = []
4965                            for i, parentId in enumerate(parentIds):
4966                                for work_item in wid:
4967                                    if work_item in self.child_listing and parentId in self.child_listing[
4968                                            work_item]:
4969                                        del_list.append(i)
4970                            for i in list(set(del_list)):
4971                                removed_id = parentIds.pop(i)
4972                                removed_name = parents.pop(i)
4973                                write_log(
4974                                        release_id, 'error', "Found parent which is descendant of child - "
4975                                        "not using, to prevent circular references. id = %s,"
4976                                        " name = %s", removed_id, removed_name)
4977                                tm = track.metadata
4978                                self.append_tag(
4979                                    release_id,
4980                                    tm,
4981                                    '~cwp_error',
4982                                    '5. Found parent which which is descendant of child - not using '
4983                                    'to prevent circular references. id = ' +
4984                                    removed_id +
4985                                    ', name = ' +
4986                                    removed_name)
4987                            is_collection = False
4988                            for attribute in parent_attributes:
4989                                if attribute['collection']:
4990                                    is_collection = True
4991                                    break
4992                            # de-dup parent ids before we start
4993                            parentIds = list(
4994                                collections.OrderedDict.fromkeys(parentIds))
4995
4996                            # add descendants to checklist to prevent recursion
4997                            for p in parentIds:
4998                                for w in wid:
4999                                    self.child_listing[p].append(w)
5000                                    if w in self.child_listing:
5001                                        self.child_listing[p] += self.child_listing[w]
5002
5003                            if parentIds:
5004                                if wid in self.works_cache:
5005                                    # Make sure we haven't done this
5006                                    # relationship before, perhaps for another
5007                                    # album
5008
5009                                    if not (set(
5010                                            self.works_cache[wid]) >= set(parentIds)):
5011                                        prev_ids = tuple(self.works_cache[wid])
5012                                        prev_name = self.parts[prev_ids]['name']
5013                                        self.works_cache[wid] = add_list_uniquely(
5014                                            self.works_cache[wid], parentIds)
5015                                        self.parts[wid]['parent'] = add_list_uniquely(
5016                                            self.parts[wid]['parent'], parentIds)
5017                                        index = self.work_listing[album].index(
5018                                            prev_ids)
5019                                        new_id_list = add_list_uniquely(
5020                                            list(prev_ids), parentIds)
5021                                        new_ids = tuple(new_id_list)
5022                                        self.work_listing[album][index] = new_ids
5023                                        self.parts[new_ids] = self.parts[prev_ids]
5024                                        #del self.parts[prev_ids]  # Removed from here to deal with multi-parent parts. De-dup now takes place in process_albums.
5025                                        self.parts[new_ids]['name'] = add_list_uniquely(
5026                                            prev_name, parents)
5027                                        parentIds = new_id_list
5028                                        write_log(
5029                                            release_id,
5030                                            'debug',
5031                                            "In work_process. Changed wid in self.part: prev_ids = %s, new_ids = %s, prev_name = %s, new name = %s",
5032                                            prev_ids,
5033                                            new_ids,
5034                                            prev_name,
5035                                            self.parts[new_ids]['name'])
5036
5037
5038                                else:
5039                                    self.works_cache[wid] = parentIds
5040                                    self.parts[wid]['parent'] = parentIds
5041                                    self.parts[tuple(parentIds)
5042                                               ]['name'] = parents
5043                                    self.work_listing[album].append(
5044                                        tuple(parentIds))
5045                                # de-duplicate the parent names
5046                                # self.parts[tuple(parentIds)]['name'] = list(
5047                                #     collections.OrderedDict.fromkeys(self.parts[tuple(parentIds)]['name']))
5048                                # list(set()) won't work as need to retain order
5049                                self.parts[tuple(parentIds)]['is_collection'] = is_collection
5050                                write_log(
5051                                    release_id,
5052                                    'debug',
5053                                    "In work_process. self.parts[%s]['is_collection']: %s",
5054                                    tuple(parentIds),
5055                                    self.parts[tuple(parentIds)]['is_collection'])
5056                                # de-duplicate the parent ids also, otherwise they will be treated as a separate parent
5057                                # in the trackback structure
5058                                self.parts[wid]['parent'] = list(
5059                                    collections.OrderedDict.fromkeys(
5060                                        self.parts[wid]['parent']))
5061                                self.works_cache[wid] = list(
5062                                    collections.OrderedDict.fromkeys(
5063                                        self.works_cache[wid]))
5064                                write_log(
5065                                        release_id,
5066                                        'info',
5067                                        'Added parent ids to work_listing: %s, [Requests = %s]',
5068                                        parentIds,
5069                                        album._requests)
5070                                write_log(
5071                                        release_id,
5072                                        'info',
5073                                        'work_listing after adding parents: %s',
5074                                        self.work_listing[album])
5075                                # the higher-level work might already be in
5076                                # cache from another album
5077                                if tuple(
5078                                        parentIds) in self.works_cache and self.USE_CACHE:
5079                                    not_in_cache = self.check_cache(
5080                                        track.metadata, album, track, tuple(parentIds), [])
5081                                    for workId_tuple in not_in_cache:
5082                                        new_queue.append(
5083                                            (release_id, album, track, workId_tuple))
5084
5085                                else:
5086                                    if not self.USE_CACHE:
5087                                        if tuple(
5088                                                parentIds) in self.works_cache:
5089                                            del self.works_cache[tuple(
5090                                                parentIds)]
5091                                    for parentId in parentIds:
5092                                        new_queue.append(
5093                                            (release_id, album, track, (parentId,)))
5094
5095                            else:
5096                                # so we remember we looked it up and found none
5097                                self.parts[wid]['no_parent'] = True
5098                                self.top_works[(track, album)]['workId'] = wid
5099                                if wid not in self.top[album]:
5100                                    self.top[album].append(wid)
5101                                write_log(
5102                                        release_id, 'info', "TOP[album]: %s", self.top[album])
5103                        else:
5104                            # so we remember we looked it up and found none
5105                            self.parts[wid]['no_parent'] = True
5106                            self.top_works[(track, album)]['workId'] = wid
5107                            self.top[album].append(wid)
5108
5109                write_log(
5110                        release_id,
5111                        'debug',
5112                        "End of tuple processing for workid %s in album %s, track %s,"
5113                        " requests remaining  = %s, new queue is %r",
5114                        workId,
5115                        album,
5116                        track,
5117                        album._requests,
5118                        new_queue)
5119                self.album_remove_request(release_id, album)
5120                for queued_item in new_queue:
5121                    write_log(
5122                            release_id,
5123                            'info',
5124                            'Have a new queue: queued_item = %r',
5125                            queued_item)
5126            write_log(
5127                    release_id,
5128                    'debug',
5129                    'Penultimate end of work_process for %s (subject to parent lookups in "new_queue")',
5130                    workId)
5131            for queued_item in new_queue:
5132                self.work_not_in_cache(
5133                    queued_item[0],
5134                    queued_item[1],
5135                    queued_item[2],
5136                    queued_item[3])
5137            write_log(release_id, 'debug',
5138                          'Ultimate end of work_process for %s', workId)
5139
5140            if album._requests == 0:
5141                self.process_album(release_id, album)
5142                album._finalize_loading(None)
5143                release_status[release_id]['works-done'] = datetime.now()
5144                close_log(release_id, 'works')
5145
5146    def work_process_metadata(self, release_id, workId, wid, track, response):
5147        """
5148        Process XML node
5149        :param release_id: name for log file - usually =musicbrainz_albumid
5150        unless called outside metadata processor
5151        NB release_id may be from a different album than the original, if works lookups are identical
5152        :param workId:
5153        :param wid: The work id tuple of which workId is a member
5154        :param track:
5155        :param response:
5156        :return:
5157        """
5158        write_log(release_id, 'debug', "In work_process_metadata")
5159        all_tags = parse_data(release_id, response, [], 'tags', 'name')
5160        self.parts[wid]['folks_genres'] = all_tags
5161        self.parts[wid]['worktype_genres'] = parse_data(
5162            release_id, response, [], 'type')
5163        key = parse_data(
5164            release_id,
5165            response,
5166            [],
5167            'attributes',
5168            'type:Key',
5169            'value')
5170        self.parts[wid]['key'] = key
5171        composed_begin_dates = year(
5172            parse_data(
5173                release_id,
5174                response,
5175                [],
5176                'relations',
5177                'target-type:artist',
5178                'type:composer',
5179                'begin'))
5180        composed_end_dates = year(
5181            parse_data(
5182                release_id,
5183                response,
5184                [],
5185                'relations',
5186                'target-type:artist',
5187                'type:composer',
5188                'end'))
5189        if composed_begin_dates == composed_end_dates:
5190            composed_dates = composed_begin_dates
5191        else:
5192            composed_dates = list(
5193                zip(composed_begin_dates, composed_end_dates))
5194            composed_dates = [y + DATE_SEP + z if y != z else y for y, z in composed_dates]
5195        self.parts[wid]['composed_dates'] = composed_dates
5196        published_begin_dates = year(
5197            parse_data(
5198                release_id,
5199                response,
5200                [],
5201                'relations',
5202                'target-type:label',
5203                'type:publishing',
5204                'begin'))
5205        published_end_dates = year(
5206            parse_data(
5207                release_id,
5208                response,
5209                [],
5210                'relations',
5211                'target-type:label',
5212                'type:publishing',
5213                'end'))
5214        if published_begin_dates == published_end_dates:
5215            published_dates = published_begin_dates
5216        else:
5217            published_dates = list(
5218                zip(published_begin_dates, published_end_dates))
5219            published_dates = [x + DATE_SEP + y for x, y in published_dates]
5220        self.parts[wid]['published_dates'] = published_dates
5221
5222        premiered_begin_dates = year(
5223            parse_data(
5224                release_id,
5225                response,
5226                [],
5227                'relations',
5228                'target-type:place',
5229                'type:premiere',
5230                'begin'))
5231        premiered_end_dates = year(
5232            parse_data(
5233                release_id,
5234                response,
5235                [],
5236                'relations',
5237                'target-type:place',
5238                'type:premiere',
5239                'end'))
5240        if premiered_begin_dates == premiered_end_dates:
5241            premiered_dates = premiered_begin_dates
5242        else:
5243            premiered_dates = list(
5244                zip(premiered_begin_dates, premiered_end_dates))
5245            premiered_dates = [x + DATE_SEP + y for x, y in premiered_dates]
5246        self.parts[wid]['premiered_dates'] = premiered_dates
5247
5248        if 'artist_locale' in config.setting:
5249            locale = config.setting["artist_locale"]
5250            # NB this is the Picard code in /util
5251            lang = locale.split("_")[0]
5252            alias = parse_data(release_id, response, [], 'aliases',
5253                               'locale:' + lang, 'primary:True', 'name')
5254            user_tags = parse_data(
5255                release_id, response, [], 'user-tags', 'name')
5256            if config.setting['cwp_aliases_tags_user']:
5257                tags = user_tags
5258            else:
5259                tags = all_tags
5260            if alias:
5261                self.parts[wid]['alias'] = self.parts[wid]['name'][:]
5262                self.parts[wid]['tags'] = tags
5263                for ind, w in enumerate(wid):
5264                    if w == workId:
5265                        # alias should be a one item list but just in case it isn't...
5266                        if len(self.parts[wid]['alias']) > ind:
5267                            # The condition here is just to trap errors caused by database inconsistencies
5268                            # (e.g. a part is shown as a recording of two works, one of which is an arrangement
5269                            # of the other - this can create a two-item wid with a one-item self.parts[wid]['name']
5270                            self.parts[wid]['alias'][ind] = '; '.join(
5271                                alias)
5272        relation_list = parse_data(release_id, response, [], 'relations')
5273        return self.work_process_relations(
5274            release_id, track, workId, wid, relation_list)
5275
5276    def work_process_relations(
5277            self,
5278            release_id,
5279            track,
5280            workId,
5281            wid,
5282            relations):
5283        """
5284        Find the parents etc.
5285        NB track is just the last album/track for this work - used as being
5286        representative for options identification. If this is inconsistent (e.g. different collections
5287        option for albums with the same works) then the latest added track will over-ride others' settings).
5288        :param release_id: name for log file - usually =musicbrainz_albumid
5289        unless called outside metadata processor
5290        :param track:
5291        :param workId:
5292        :param wid:
5293        :param relations:
5294        :return:
5295        """
5296        write_log(
5297                release_id,
5298                'debug',
5299                "In work_process_relations. Relations--> %s",
5300                relations)
5301        if track:
5302            options = self.options[track]
5303        else:
5304            options = config.setting
5305        new_workIds = []
5306        new_works = []
5307        attributes_list = []
5308        relation_attributes = parse_data(
5309            release_id,
5310            relations,
5311            [],
5312            'target-type:work',
5313            'type:parts',
5314            'direction:backward',
5315            'attributes')
5316        new_work_list = []
5317        write_log(
5318            release_id,
5319            'debug',
5320            "relation_attributes--> %s",
5321            relation_attributes)
5322        for relation_attribute in relation_attributes:
5323            if (
5324                    'part of collection' not in relation_attribute) or options['cwp_collections']:
5325                new_work_list += parse_data(release_id,
5326                                            relations,
5327                                            [],
5328                                            'target-type:work',
5329                                            'type:parts',
5330                                            'direction:backward',
5331                                            'work')
5332            attributes_dict = {'collection' : ('part of collection' in relation_attribute),
5333                               'movements' : ('movement' in relation_attribute),
5334                               'acts' : ('act' in relation_attribute),
5335                               'numbers' : ('number' in relation_attribute)}
5336            attributes_list += [attributes_dict]
5337            if (
5338                    'part of collection' in relation_attribute) and not options['cwp_collections']:
5339                write_log(
5340                    release_id,
5341                    'info',
5342                    'Not getting parent work because relationship is "part of collection" and option not selected')
5343        if new_work_list:
5344            write_log(
5345                    release_id,
5346                    'info',
5347                    'new_work_list: %s',
5348                    new_work_list)
5349            new_workIds = parse_data(release_id, new_work_list, [], 'id')
5350            new_works = parse_data(release_id, new_work_list, [], 'title')
5351        else:
5352            arrangement_of = parse_data(
5353                release_id,
5354                relations,
5355                [],
5356                'target-type:work',
5357                'type:arrangement',
5358                'direction:backward',
5359                'work')
5360            if arrangement_of and options['cwp_arrangements']:
5361                new_workIds = parse_data(release_id, arrangement_of, [], 'id')
5362                new_works = parse_data(release_id, arrangement_of, [], 'title')
5363                self.parts[wid]['arrangement'] = True
5364            else:
5365                medley_of = parse_data(
5366                    release_id,
5367                    relations,
5368                    [],
5369                    'target-type:work',
5370                    'type:medley',
5371                    'work')
5372                direction = parse_data(
5373                    release_id,
5374                    relations,
5375                    [],
5376                    'target-type:work',
5377                    'type:medley',
5378                    'direction')
5379                if 'backward' not in direction:
5380                    write_log(
5381                            release_id, 'info', 'Medley_of: %s', medley_of)
5382                    if medley_of and options['cwp_medley']:
5383                        medley_list = []
5384                        medley_id_list = []
5385                        for medley_item in medley_of:
5386                            medley_list = medley_list + \
5387                                parse_data(release_id, medley_item, [], 'title')
5388                            medley_id_list = medley_id_list + \
5389                                parse_data(release_id, medley_item, [], 'id')
5390                            # (parse_data is a list...)
5391                            new_workIds = medley_id_list
5392                            new_works = medley_list
5393                            write_log(
5394                                    release_id, 'info', 'Medley_list: %s', medley_list)
5395                        self.parts[wid]['medley_list'] = medley_list
5396
5397        write_log(
5398                release_id,
5399                'info',
5400                'New works: ids: %s, names: %s, attributes: %s',
5401                new_workIds,
5402                new_works,
5403                attributes_list)
5404
5405        artists = get_artists(
5406            options,
5407            release_id,
5408            {},
5409            relations,
5410            'work')['artists']
5411        # artist_types = ['arranger', 'instrument arranger', 'orchestrator', 'composer', 'writer', 'lyricist',
5412        #                 'librettist', 'revised by', 'translator', 'reconstructed by', 'vocal arranger']
5413
5414        write_log(release_id, 'info', "ARTISTS %s", artists)
5415
5416        workItems = (new_workIds, new_works, attributes_list)
5417        itemsFound = [workItems, artists]
5418        return itemsFound
5419
5420    @staticmethod
5421    def album_add_request(release_id, album):
5422        """
5423        To keep track as to whether all lookups have been processed
5424        :param release_id: name for log file - usually =musicbrainz_albumid
5425        unless called outside metadata processor
5426        :param album:
5427        :return:
5428        """
5429        album._requests += 1
5430        write_log(
5431                release_id,
5432                'debug',
5433                "Added album request - requests: %s",
5434                album._requests)
5435
5436    @staticmethod
5437    def album_remove_request(release_id, album):
5438        """
5439        To keep track as to whether all lookups have been processed
5440        :param release_id: name for log file - usually =musicbrainz_albumid
5441        unless called outside metadata processor
5442        :param album:
5443        :return:
5444        """
5445        album._requests -= 1
5446        write_log(
5447                release_id,
5448                'debug',
5449                "Removed album request - requests: %s",
5450                album._requests)
5451
5452    ##################################################
5453    # SECTION 3 - Organise tracks and works in album #
5454    ##################################################
5455
5456    def process_album(self, release_id, album):
5457        """
5458        Top routine to run end-of-album processes
5459        :param release_id: name for log file - usually =musicbrainz_albumid
5460        unless called outside metadata processor
5461        :param album:
5462        :return:
5463        """
5464        write_log(release_id, 'debug', "PROCESS ALBUM %s", album)
5465        release_status[release_id]['done-lookups'] = datetime.now()
5466        # De-duplicate names in self.parts, maintaining order (in case part names have been arrived at via multiple paths)
5467        for part_item in self.parts:
5468            if 'name' in self.parts[part_item]:
5469                self.parts[part_item]['name'] = list(collections.OrderedDict.fromkeys(str_to_list(self.parts[part_item]['name'])))
5470        # populate the inverse hierarchy
5471        write_log(release_id, 'info', "Cache: %s", self.works_cache)
5472        write_log(release_id, 'info', "Work listing %s", self.work_listing)
5473        alias_tag_list = config.setting['cwp_aliases_tag_text'].split(',')
5474        for i, tag_item in enumerate(alias_tag_list):
5475            alias_tag_list[i] = tag_item.strip()
5476        for workId in self.work_listing[album]:
5477            if workId in self.parts:
5478                write_log(
5479                    release_id,
5480                    'info',
5481                    'Processing workid: %s',
5482                    workId)
5483                write_log(
5484                    release_id,
5485                    'info',
5486                    'self.work_listing[album]: %s',
5487                    self.work_listing[album])
5488                if len(workId) > 1:
5489                    # fix the order of names using ordering keys gathered in
5490                    # work_process
5491                    if 'order' in self.parts[workId]:
5492                        seq = []
5493                        for idx in workId:
5494                            if idx in self.parts[workId]['order']:
5495                                seq.append(self.parts[workId]['order'][idx])
5496                            else:
5497                                # for the possibility of workids not part of
5498                                # the same parent and not all ordered
5499                                seq.append(999)
5500                        zipped_names = zip(self.parts[workId]['name'], seq)
5501                        sorted_tups = sorted(zipped_names, key=lambda x: x[1])
5502                        self.parts[workId]['name'] = [x[0]
5503                                                      for x in sorted_tups]
5504                # use aliases where appropriate
5505                # name is a list - need a string to test for Latin chars
5506                name_string = '; '.join(self.parts[workId]['name'])
5507                if config.setting['cwp_aliases']:
5508                    if config.setting['cwp_aliases_all'] or (
5509                        config.setting['cwp_aliases_greek'] and not only_roman_chars(name_string)) or (
5510                        'tags' in self.parts[workId] and any(
5511                            x in self.parts[workId]['tags'] for x in alias_tag_list)):
5512                        if 'alias' in self.parts[workId] and self.parts[workId]['alias']:
5513                            self.parts[workId]['name'] = self.parts[workId]['alias'][:]
5514                topId = None
5515                write_log(
5516                        release_id,
5517                        'info',
5518                        'Works_cache: %s',
5519                        self.works_cache)
5520                if workId in self.works_cache:
5521                    parentIds = tuple(self.works_cache[workId])
5522                    # for parentId in parentIds:
5523                    write_log(
5524                            release_id,
5525                            'debug',
5526                            "Create inverses: %s, %s",
5527                            workId,
5528                            parentIds)
5529                    if parentIds in self.partof[album]:
5530                        if workId not in self.partof[album][parentIds]:
5531                            self.partof[album][parentIds].append(workId)
5532                    else:
5533                        self.partof[album][parentIds] = [workId]
5534                    write_log(release_id, 'info', "Partof: %s",
5535                                  self.partof[album][parentIds])
5536                    if 'no_parent' in self.parts[parentIds]:
5537                        # to handle case if album includes works already in
5538                        # cache from a different album
5539                        if self.parts[parentIds]['no_parent']:
5540                            topId = parentIds
5541                else:
5542                    topId = workId
5543                if topId:
5544                    if album in self.top:
5545                        if topId not in self.top[album]:
5546                            self.top[album].append(topId)
5547                    else:
5548                        self.top[album] = [topId]
5549        # work out the full hierarchy and part levels
5550        height = 0
5551        write_log(
5552                release_id,
5553                'info',
5554                "TOP: %s, \nALBUM: %s, \nTOP[ALBUM]: %s",
5555                self.top,
5556                album,
5557                self.top[album])
5558        if len(self.top[album]) > 1:
5559            single_work_album = 0
5560        else:
5561            single_work_album = 1
5562        for topId in self.top[album]:
5563            self.create_trackback(release_id, album, topId)
5564            write_log(
5565                    release_id,
5566                    'info',
5567                    "Top id = %s, Name = %s",
5568                    topId,
5569                    self.parts[topId]['name'])
5570            write_log(
5571                    release_id,
5572                    'info',
5573                    "Trackback before levels: %s",
5574                    self.trackback[album][topId])
5575            work_part_levels = self.level_calc(
5576                release_id, self.trackback[album][topId], height)
5577            write_log(
5578                    release_id,
5579                    'info',
5580                    "Trackback after levels: %s",
5581                    self.trackback[album][topId])
5582            # determine the level which will be the principal 'work' level
5583            if work_part_levels >= 3:
5584                ref_level = work_part_levels - single_work_album
5585            else:
5586                ref_level = work_part_levels
5587            # extended metadata scheme won't display more than 3 work levels
5588            # ref_level = min(3, ref_level)
5589            ref_height = work_part_levels - ref_level
5590            top_info = {
5591                'levels': work_part_levels,
5592                'id': topId,
5593                'name': self.parts[topId]['name'],
5594                'single': single_work_album}
5595            # set the metadata in sequence defined by the work structure
5596            answer = self.process_trackback(
5597                release_id,
5598                album,
5599                self.trackback[album][topId],
5600                ref_height,
5601                top_info)
5602            ##
5603            #     trackback is a tree in the form {album: {id: , children:{id: , children{},
5604            #                                                             id: etc},
5605            #                                             id: etc} }
5606            #     process_trackback uses the trackback tree to derive title and level_0 based hierarchies
5607            #     from the structure. It also returns a tuple (id, tracks), where tracks has the structure
5608            #     {'track': [(track, height), (track, height), ...tuples...]
5609            #     'work': [[worknames], [worknames], ...lists...]
5610            #     'tracknumber': [num, num, ...floats of form n.nnn = disc.track...]
5611            #     'title':  [title, title, ...strings...]}
5612            #     each list is the same length - i.e. the number of tracks for the top work
5613            #     there can be more than one workname for a track
5614            #     height is the number of part levels for the related track
5615            ##
5616            if answer:
5617                tracks = sorted(zip(answer[1]['track'], answer[1]['tracknumber']), key=lambda x: x[1])
5618                # need them in tracknumber sequence for the movement numbers to be correct
5619                write_log(release_id, 'info', "TRACKS: %s", tracks)
5620                # work_part_levels = self.trackback[album][topId]['depth']
5621                movement_count = 0
5622                prev_movementgroup = None
5623                for track, _ in tracks:
5624                    movement_count += 1
5625                    track_meta = track[0]
5626                    tm = track_meta.metadata
5627                    if '~cwp_workid_0' in tm:
5628                        workIds = tuple(str_to_list(tm['~cwp_workid_0']))
5629                        if workIds:
5630                            count = 0
5631                            self.process_work_artists(
5632                                release_id, album, track_meta, workIds, tm, count)
5633                    title_work_levels = 0
5634                    if '~cwp_title_work_levels' in tm:
5635                        title_work_levels = int(tm['~cwp_title_work_levels'])
5636                    movementgroup = self.extend_metadata(
5637                        release_id,
5638                        top_info,
5639                        track_meta,
5640                        ref_height,
5641                        title_work_levels)  # revise for new data
5642                    if track_meta not in self.tracks[album]:
5643                        self.tracks[album][track_meta] = {}
5644                    if movementgroup:
5645                        if movementgroup != prev_movementgroup:
5646                            movement_count = 1
5647                        write_log(
5648                            release_id,
5649                            'debug',
5650                            "processing movements for track: %s - movement-group is %s",
5651                            track, movementgroup)
5652                        self.tracks[album][track_meta]['movement-group'] = movementgroup
5653                        self.tracks[album][track_meta]['movement-number'] = movement_count
5654                        self.parts[tuple(movementgroup)]['movement-total'] = movement_count
5655                    prev_movementgroup = movementgroup
5656
5657                write_log(
5658                        release_id,
5659                        'debug',
5660                        "FINISHED TRACK PROCESSING FOR Top work id: %s",
5661                        topId)
5662        # Need to redo the loop so that all album-wide tm is updated before
5663        # publishing
5664        for track, movement_info in self.tracks[album].items():
5665            self.publish_metadata(release_id, album, track, movement_info)
5666        # #
5667        # The messages below are normally commented out as they get VERY long if there are a lot of albums loaded
5668        # For extreme debugging, remove the comments and just run one or a few albums
5669        # Do not forget to comment out again.
5670        # #
5671        # write_log(release_id, 'info', 'Self.parts: %s', self.parts)
5672        # write_log(release_id, 'info', 'Self.trackback: %s', self.trackback)
5673
5674        # tidy up
5675        self.trackback[album].clear()
5676        # Finally process the orphan tracks
5677        if album in self.orphan_tracks:
5678            for track in self.orphan_tracks[album]:
5679                tm = track.metadata
5680                options = self.options[track]
5681                if options['cwp_derive_works_from_title']:
5682                    work, movt, inter_work = self.derive_from_title(release_id, track, tm['title'])
5683                    tm['~cwp_extended_work'] = tm['~cwp_extended_groupheading'] = tm['~cwp_title_work'] = \
5684                        tm['~cwp_title_groupheading'] = tm['~cwp_work'] = tm['~cwp_groupheading']= work
5685                    tm['~cwp_part'] = tm['~cwp_extended_part'] = tm['~cwp_title_part_0'] = movt
5686                    tm['~cwp_inter_work'] = tm['~cwp_extended_inter_work'] = tm['~cwp_inter_title_work'] = inter_work
5687                self.publish_metadata(release_id, album, track)
5688        write_log(release_id, 'debug', "PROCESS ALBUM function complete")
5689
5690    def create_trackback(self, release_id, album, parentId):
5691        """
5692        Create an inverse listing of the work-parent relationships
5693        :param release_id:
5694        :param album:
5695        :param parentId:
5696        :return: trackback for a given parentId
5697        """
5698        write_log(release_id, 'debug', "Create trackback for %s", parentId)
5699        if parentId in self.partof[album]:  # NB parentId is a tuple
5700            for child in self.partof[album][parentId]:  # NB child is a tuple
5701                if child in self.partof[album]:
5702                    child_trackback = self.create_trackback(
5703                        release_id, album, child)
5704                    self.append_trackback(
5705                        release_id, album, parentId, child_trackback)
5706                else:
5707                    self.append_trackback(
5708                        release_id, album, parentId, self.trackback[album][child])
5709            return self.trackback[album][parentId]
5710        else:
5711            return self.trackback[album][parentId]
5712
5713    def append_trackback(self, release_id, album, parentId, child):
5714        """
5715        Recursive process to populate trackback
5716        :param release_id: name for log file - usually =musicbrainz_albumid
5717        unless called outside metadata processor
5718        :param album:
5719        :param parentId:
5720        :param child:
5721        :return:
5722        """
5723        write_log(release_id, 'debug', "In append_trackback...")
5724        if parentId in self.trackback[album]:  # NB parentId is a tuple
5725            if 'children' in self.trackback[album][parentId]:
5726                if child not in self.trackback[album][parentId]['children']:
5727                    write_log(release_id, 'info', "TRYING TO APPEND...")
5728                    self.trackback[album][parentId]['children'].append(child)
5729                    write_log(
5730                            release_id,
5731                            'info',
5732                            "...PARENT %s - ADDED %s as child",
5733                            self.parts[parentId]['name'],
5734                            child)
5735                else:
5736                    write_log(
5737                            release_id,
5738                            'info',
5739                            "Parent %s already has %s as child",
5740                            parentId,
5741                            child)
5742            else:
5743                self.trackback[album][parentId]['children'] = [child]
5744                write_log(
5745                        release_id,
5746                        'info',
5747                        "Existing PARENT %s - ADDED %s as child",
5748                        self.parts[parentId]['name'],
5749                        child)
5750        else:
5751            self.trackback[album][parentId]['id'] = parentId
5752            self.trackback[album][parentId]['children'] = [child]
5753            write_log(
5754                release_id,
5755                'info',
5756                "New PARENT %s - ADDED %s as child",
5757                self.parts[parentId]['name'],
5758                child)
5759            write_log(
5760                release_id,
5761                'info',
5762                "APPENDED TRACKBACK: %s",
5763                self.trackback[album][parentId])
5764        return self.trackback[album][parentId]
5765
5766    def level_calc(self, release_id, trackback, height):
5767        """
5768        Recursive process to determine the max level for a work
5769        :param release_id: name for log file - usually =musicbrainz_albumid
5770        unless called outside metadata processor
5771        :param trackback:
5772        :param height: number of levels above this one
5773        :return:
5774        """
5775        write_log(release_id, 'debug', 'In level_calc process')
5776        if 'children' not in trackback:
5777            write_log(release_id, 'info', "Got to bottom")
5778            trackback['height'] = height
5779            trackback['depth'] = 0
5780            return 0
5781        else:
5782            trackback['height'] = height
5783            height += 1
5784            max_depth = 0
5785            for child in trackback['children']:
5786                write_log(release_id, 'info', "CHILD: %s", child)
5787                depth = self.level_calc(release_id, child, height) + 1
5788                write_log(release_id, 'info', "DEPTH: %s", depth)
5789                max_depth = max(depth, max_depth)
5790            trackback['depth'] = max_depth
5791            return max_depth
5792
5793        ###########################################
5794        # SECTION 4 - Process tracks within album #
5795        ###########################################
5796
5797    def process_trackback(
5798            self,
5799            release_id,
5800            album_req,
5801            trackback,
5802            ref_height,
5803            top_info):
5804        """
5805        Set work structure metadata & govern other metadata-setting processes
5806        :param release_id: name for log file - usually =musicbrainz_albumid
5807        unless called outside metadata processor
5808        :param album_req:
5809        :param trackback:
5810        :param ref_height:
5811        :param top_info:
5812        :return:
5813        """
5814        write_log(
5815                release_id,
5816                'debug',
5817                "IN PROCESS_TRACKBACK. Trackback = %s",
5818                trackback)
5819        tracks = collections.defaultdict(dict)
5820        process_now = False
5821        if 'meta' in trackback:
5822            for track, album in trackback['meta']:
5823                if album_req == album:
5824                    process_now = True
5825        if process_now or 'children' not in trackback:
5826            if 'meta' in trackback and 'id' in trackback and 'depth' in trackback and 'height' in trackback:
5827                write_log(release_id, 'info', "Processing level 0")
5828                depth = trackback['depth']
5829                height = trackback['height']
5830                workId = tuple(trackback['id'])
5831                if depth != 0:
5832                    if 'children' in trackback:
5833                        child_response = self.process_trackback_children(
5834                            release_id, album_req, trackback, ref_height, top_info, tracks)
5835                        tracks = child_response[1]
5836                    write_log(
5837                            release_id,
5838                            'info',
5839                            'Bottom level for this trackback is higher level elsewhere - adjusting levels')
5840                    depth = 0
5841                write_log(release_id, 'info', "WorkId: %s, Work name: %s", workId, self.parts[workId]['name'])
5842                for track, album in trackback['meta']:
5843                    if album == album_req:
5844                        write_log(release_id, 'info', "Track: %s", track)
5845                        tm = track.metadata
5846                        write_log(
5847                                release_id, 'info', "Track metadata = %s", tm)
5848                        tm['~cwp_workid_' + str(depth)] = workId
5849                        self.write_tags(release_id, track, tm, workId)
5850                        self.make_annotations(release_id, track, workId)
5851                        # strip leading and trailing spaces from work names
5852                        if isinstance(self.parts[workId]['name'], str):
5853                            worktemp = self.parts[workId]['name'].strip()
5854                        else:
5855                            for index, it in enumerate(
5856                                    self.parts[workId]['name']):
5857                                self.parts[workId]['name'][index] = it.strip()
5858                            worktemp = self.parts[workId]['name']
5859                        if isinstance(top_info['name'], str):
5860                            toptemp = top_info['name'].strip()
5861                        else:
5862                            for index, it in enumerate(top_info['name']):
5863                                top_info['name'][index] = it.strip()
5864                            toptemp = top_info['name']
5865                        tm['~cwp_work_' + str(depth)] = worktemp
5866                        tm['~cwp_part_levels'] = str(height)
5867                        tm['~cwp_work_part_levels'] = str(top_info['levels'])
5868                        tm['~cwp_workid_top'] = top_info['id']
5869                        tm['~cwp_work_top'] = toptemp
5870                        tm['~cwp_single_work_album'] = top_info['single']
5871                        write_log(
5872                                release_id, 'info', "Track metadata = %s", tm)
5873                        if 'track' in tracks:
5874                            tracks['track'].append((track, height))
5875                        else:
5876                            tracks['track'] = [(track, height)]
5877                        tracks['tracknumber'] = [int(tm['discnumber']) + (int(tm['tracknumber']) / 1000)]
5878                        # Hopefully no more than 999 tracks per disc!
5879                        write_log(release_id, 'info', "Tracks: %s", tracks)
5880
5881                response = (workId, tracks)
5882                write_log(release_id, 'debug', "LEAVING PROCESS_TRACKBACK")
5883                write_log(
5884                        release_id,
5885                        'info',
5886                        "depth %s Response = %s",
5887                        depth,
5888                        response)
5889                return response
5890            else:
5891                return None
5892        else:
5893            response = self.process_trackback_children(
5894                release_id, album_req, trackback, ref_height, top_info, tracks)
5895            return response
5896
5897    def process_trackback_children(
5898            self,
5899            release_id,
5900            album_req,
5901            trackback,
5902            ref_height,
5903            top_info,
5904            tracks):
5905        """
5906        TODO add some better documentation!
5907        :param release_id: name for log file - usually =musicbrainz_albumid
5908        unless called outside metadata processor
5909        :param album_req:
5910        :param trackback:
5911        :param ref_height:
5912        :param top_info:
5913        :param tracks:
5914        :return:
5915        """
5916        if 'id' in trackback and 'depth' in trackback and 'height' in trackback:
5917            write_log(
5918                    release_id,
5919                    'debug',
5920                    'In process_children_trackback for trackback %s',
5921                    trackback)
5922            depth = trackback['depth']
5923            height = trackback['height']
5924            parentId = tuple(trackback['id'])
5925            parent = self.parts[parentId]['name']
5926            width = 0
5927            for child in trackback['children']:
5928                width += 1
5929                write_log(
5930                        release_id,
5931                        'info',
5932                        "child trackback = %s",
5933                        child)
5934                answer = self.process_trackback(
5935                    release_id, album_req, child, ref_height, top_info)
5936                if answer:
5937                    workId = answer[0]
5938                    child_tracks = answer[1]['track']
5939                    for track in child_tracks:
5940                        track_meta = track[0]
5941                        track_height = track[1]
5942                        part_level = track_height - height
5943                        write_log(
5944                                release_id,
5945                                'debug',
5946                                "Calling set metadata %s",
5947                                (part_level,
5948                                 workId,
5949                                 parentId,
5950                                 parent,
5951                                 track_meta))
5952                        self.set_metadata(
5953                            release_id, part_level, workId, parentId, parent, track_meta)
5954                        if 'track' in tracks:
5955                            tracks['track'].append(
5956                                (track_meta, track_height))
5957                        else:
5958                            tracks['track'] = [(track_meta, track_height)]
5959                        tm = track_meta.metadata
5960                        # ~cwp_title if composer had to be removed
5961                        title = tm['~cwp_title'] or tm['title']
5962                        if 'title' in tracks:
5963                            tracks['title'].append(title)
5964                        else:
5965                            tracks['title'] = [title]
5966                        # to make sure we get it as a list
5967                        work = tm.getall('~cwp_work_0')
5968                        if 'work' in tracks:
5969                            tracks['work'].append(work)
5970                        else:
5971                            tracks['work'] = [work]
5972                        if 'tracknumber' not in tm:
5973                            tm['tracknumber'] = 0
5974                        if 'discnumber' not in tm:
5975                            tm['discnumber'] = 0
5976                        if 'tracknumber' in tracks:
5977                            tracks['tracknumber'].append(
5978                                int(tm['discnumber']) + (int(tm['tracknumber']) / 1000))
5979                        else:
5980                            tracks['tracknumber'] = [
5981                                int(tm['discnumber']) + (int(tm['tracknumber']) / 1000)]
5982            if tracks and 'track' in tracks:
5983                track = tracks['track'][0][0]
5984                # NB this will only be the first track of tracks, but its
5985                # options will be used for the structure
5986                self.derive_from_structure(
5987                    release_id, top_info, tracks, height, depth, width, 'title')
5988                if self.options[track]["cwp_level0_works"]:
5989                    # replace hierarchical works with those from work_0 (for
5990                    # consistency)
5991                    self.derive_from_structure(
5992                        release_id, top_info, tracks, height, depth, width, 'work')
5993
5994                write_log(
5995                        release_id,
5996                        'info',
5997                        "Trackback result for %s = %s",
5998                        parentId,
5999                        tracks)
6000                response = parentId, tracks
6001                write_log(
6002                        release_id,
6003                        'debug',
6004                        "LEAVING PROCESS_CHILD_TRACKBACK depth %s Response = %s",
6005                        depth,
6006                        response)
6007                return response
6008            else:
6009                return None
6010        else:
6011            return None
6012
6013    def derive_from_structure(
6014            self,
6015            release_id,
6016            top_info,
6017            tracks,
6018            height,
6019            depth,
6020            width,
6021            name_type):
6022        """
6023        Derive title (or work level-0) components from MB hierarchical work structure
6024        :param release_id: name for log file - usually =musicbrainz_albumid
6025        unless called outside metadata processor
6026        :param top_info:
6027         {'levels': work_part_levels,'id': topId,'name': self.parts[topId]['name'],'single': single_work_album}
6028        :param tracks:
6029         {'track':[(track1, height1), (track2, height2), ...], 'work': [work1, work2,...],
6030          'title': [title1, title2, ...], 'tracknumber': [tracknumber1, tracknumber2, ...]}
6031          where height is the number of levels in total in the branch for that track (i.e. height 1 => work_0 & work_1)
6032        :param height: number of levels above the current one
6033        :param depth: maximum number of levels
6034        :param width: number of siblings
6035        :param name_type: work or title
6036        :return:
6037        """
6038        if 'track' in tracks:
6039            track = tracks['track'][0][0]
6040            # NB this will only be the first track of tracks, but its
6041            # options will be used for the structure
6042            single_work_track = False  # default
6043            write_log(
6044                release_id,
6045                'debug',
6046                "Deriving info for %s from structure for tracks %s",
6047                name_type,
6048                tracks['track'])
6049            write_log(
6050                release_id,
6051                'info',
6052                '%ss are %r',
6053                name_type,
6054                tracks[name_type])
6055            if 'tracknumber' in tracks:
6056                sorted_tracknumbers = sorted(tracks['tracknumber'])
6057            else:
6058                sorted_tracknumbers = None
6059            write_log(
6060                    release_id,
6061                    'info',
6062                    "SORTED TRACKNUMBERS: %s",
6063                    sorted_tracknumbers)
6064            common_len = 0
6065            if name_type in tracks:
6066                meta_str = "_title" if name_type == 'title' else "_X0"
6067                # in case of works, could be a list of lists
6068                name_list = tracks[name_type]
6069                write_log(
6070                        release_id,
6071                        'info',
6072                        "%s list %s",
6073                        name_type,
6074                        name_list)
6075                if len(name_list) == 1:  # only one track in this work so try and extract using colons
6076                    single_work_track = True
6077                    track_height = tracks['track'][0][1]
6078                    if track_height - height > 0:  # track_height - height == part_level
6079                        if name_type == 'title':
6080                            write_log(
6081                                    release_id,
6082                                    'debug',
6083                                    "Single track work. Deriving directly from title text: %s",
6084                                    track)
6085                            ti = name_list[0]
6086                            common_subset = self.derive_from_title(
6087                                release_id, track, ti)[0]
6088                        else:
6089                            common_subset = ""
6090                    else:
6091                        common_subset = name_list[0]
6092                    write_log(
6093                            release_id,
6094                            'info',
6095                            "%s is single-track work. common_subset is set to %s",
6096                            tracks['track'][0][0],
6097                            common_subset)
6098                    if common_subset:
6099                        common_len = len(common_subset)
6100                    else:
6101                        common_len = 0
6102                else:  # NB if names are lists of lists, we'll assume they all start the same way
6103                    if isinstance(name_list[0], list):
6104                        compare = name_list[0][0].split()
6105                    else:
6106                        # a list of the words in the first name
6107                        compare = name_list[0].split()
6108                    for name_item in name_list:
6109                        if isinstance(name_item, list):
6110                            name = name_item[0]
6111                        else:
6112                            name = name_item
6113                        lcs = longest_common_sequence(compare, name.split())
6114                        compare = lcs['sequence']
6115                        if not compare:
6116                            common_len = 0
6117                            break
6118                        if lcs['length'] > 0:
6119                            common_subset = " ".join(compare)
6120                            write_log(
6121                                    release_id,
6122                                    'info',
6123                                    "Common subset from %ss at level %s, item name %s ..........",
6124                                    name_type,
6125                                    tracks['track'][0][1] -
6126                                    height,
6127                                    name)
6128                            write_log(
6129                                    release_id, 'info', "..........is %s", common_subset)
6130                            common_len = len(common_subset)
6131
6132                write_log(
6133                        release_id,
6134                        'info',
6135                        "checked for common sequence - length is %s",
6136                        common_len)
6137            for track_index, track_item in enumerate(tracks['track']):
6138                track_meta = track_item[0]
6139                tm = track_meta.metadata
6140                top_level = int(tm['~cwp_part_levels'])
6141                part_level = track_item[1] - height
6142                if common_len > 0:
6143                    self.create_work_levels(release_id, name_type, tracks, track, track_index,
6144                                            track_meta, tm, meta_str, part_level, depth, width, common_len)
6145
6146                else:  # (no common substring at this level)
6147                    if name_type == 'work':
6148                        write_log(release_id, 'info',
6149                                  'single track work - indicator = %s. track = %s, part_level = %s, top_level = %s',
6150                                  single_work_track, track_item, part_level, top_level)
6151                        if part_level >= top_level:  # so it won't be covered by top-down action
6152                            for level in range(
6153                                    0, part_level + 1):  # fill in the missing work names from the canonical list
6154                                if '~cwp' + meta_str + '_work_' + \
6155                                        str(level) not in tm:
6156                                    tm['~cwp' +
6157                                       meta_str +
6158                                       '_work_' +
6159                                       str(level)] = tm['~cwp_work_' +
6160                                                        str(level)]
6161                                    if level > 0:
6162                                        self.level0_warn(release_id, tm, level)
6163                                if '~cwp' + meta_str + '_part_' + \
6164                                        str(level) not in tm and '~cwp_part_' + str(level) in tm:
6165                                    tm['~cwp' +
6166                                       meta_str +
6167                                       '_part_' +
6168                                       str(level)] = tm['~cwp_part_' +
6169                                                        str(level)]
6170                                    if level > 0:
6171                                        self.level0_warn(release_id, tm, level)
6172
6173
6174    def create_work_levels(self, release_id, name_type, tracks, track, track_index,
6175                           track_meta, tm, meta_str, part_level, depth, width, common_len):
6176        """
6177        For a group of tracks with common metadata in the title/level0 work, create the work structure
6178        for that metadata, using the structure in the MB database
6179        :param release_id:
6180        :param name_type: title or work
6181        :param tracks: {'track':[(track1, height1), (track2, height2), ...], 'work': [work1, work2,...],
6182          'title': [title1, title2, ...], 'tracknumber': [tracknumber1, tracknumber2, ...]}
6183          where height is the number of levels in total in the branch for that track (i.e. height 1 => work_0 & work_1)
6184        :param track:
6185        :param track_index: index of track in tracks
6186        :param track_meta:
6187        :param tm: track meta (dup?)
6188        :param meta_str: string created from name_type
6189        :param part_level: The level of the current item in the works hierarchy
6190        :param depth: The number of levels below the current item
6191        :param width: The number of children of the current item
6192        :param common_len: length of the common text
6193        :return:
6194        """
6195        allow_repeats = True
6196        write_log(
6197            release_id,
6198            'info',
6199            "Use %s info for track: %s at level %s",
6200            name_type,
6201            track_meta,
6202            part_level)
6203        name = tracks[name_type][track_index]
6204        if isinstance(name, list):
6205            work = name[0][:common_len]
6206        else:
6207            work = name[:common_len]
6208        work = work.rstrip(":,.;- ")
6209        if self.options[track]["cwp_removewords_p"]:
6210            removewords = self.options[track]["cwp_removewords_p"].split(
6211                ',')
6212        else:
6213            removewords = []
6214        write_log(
6215            release_id,
6216            'info',
6217            "Prefixes (in %s) = %s",
6218            name_type,
6219            removewords)
6220        for prefix in removewords:
6221            prefix2 = str(prefix).lower().rstrip()
6222            if prefix2[0] != " ":
6223                prefix2 = " " + prefix2
6224            write_log(
6225                release_id, 'info', "checking prefix %s", prefix2)
6226            if work.lower().endswith(prefix2):
6227                if len(prefix2) > 0:
6228                    work = work[:-len(prefix2)]
6229                    common_len = len(work)
6230                    work = work.rstrip(":,.;- ")
6231            if work.lower() == prefix2.strip():
6232                work = ''
6233                common_len = 0
6234        write_log(
6235            release_id,
6236            'info',
6237            "work after prefix strip %s",
6238            work)
6239        write_log(release_id, 'info', "Prefixes checked")
6240
6241        tm['~cwp' + meta_str + '_work_' +
6242           str(part_level)] = work
6243
6244        if part_level > 0 and name_type == "work":
6245            write_log(
6246                release_id,
6247                'info',
6248                'checking if %s is repeated name at part_level = %s',
6249                work,
6250                part_level)
6251            write_log(release_id, 'info', 'lower work name is %s',
6252                      tm['~cwp' + meta_str + '_work_' + str(part_level - 1)])
6253        # fill in missing names caused by no common string at lower levels
6254        # count the missing levels and push the current name
6255        # down to the lowest missing level
6256        missing_levels = 0
6257        fill_level = part_level - 1
6258        while '~cwp' + meta_str + '_work_' + \
6259                str(fill_level) not in tm:
6260            missing_levels += 1
6261            fill_level -= 1
6262            if fill_level < 0:
6263                break
6264        write_log(
6265            release_id,
6266            'info',
6267            'there is/are %s missing level(s)',
6268            missing_levels)
6269        if missing_levels > 0:
6270            allow_repeats = True
6271        for lev in range(
6272                part_level - missing_levels, part_level):
6273
6274            if lev > 0:  # not filled_lowest and lev > 0:
6275                tm['~cwp' + meta_str +
6276                   '_work_' + str(lev)] = work
6277                tm['~cwp' +
6278                   meta_str +
6279                   '_part_' +
6280                   str(lev - 1)] = self.strip_parent_from_work(track,
6281                                                               release_id,
6282                                                               interpret(tm['~cwp' + meta_str + '_work_'
6283                                                                            + str(lev - 1)]),
6284                                                               tm['~cwp' + meta_str + '_work_' + str(lev)],
6285                                                               lev - 1, False)[0]
6286            else:
6287                tm['~cwp' + meta_str + '_work_' + str(lev)] = tm['~cwp_work_' + str(lev)]
6288
6289        if missing_levels > 0:
6290            write_log(release_id, 'info', 'lower work name is now %r', tm.getall(
6291                '~cwp' + meta_str + '_work_' + str(part_level - 1)))
6292        # now fix the repeated work name at this level
6293        if work == tm['~cwp' + meta_str + '_work_' +
6294                      str(part_level - 1)] and not allow_repeats:
6295            tm['~cwp' +
6296               meta_str +
6297               '_work_' +
6298               str(part_level)] = tm['~cwp_work_' +
6299                                     str(part_level)]
6300            self.level0_warn(release_id, tm, part_level)
6301        tm['~cwp' +
6302           meta_str +
6303           '_part_' +
6304           str(part_level -
6305               1)] = self.strip_parent_from_work(track,
6306                                                 release_id,
6307                                                 tm.getall('~cwp' + meta_str + '_work_' + str(part_level - 1)),
6308                                                 tm['~cwp' + meta_str + '_work_' + str(part_level)],
6309                                                 part_level - 1, False)[0]
6310        if part_level == 1:
6311            if isinstance(name, list):
6312                movt = [x[common_len:].strip().lstrip(":,.;- ")
6313                        for x in name]
6314            else:
6315                movt = name[common_len:].strip().lstrip(":,.;- ")
6316            write_log(
6317                release_id, 'info', "%s - movt = %s", name_type, movt)
6318            tm['~cwp' + meta_str + '_part_0'] = movt
6319        write_log(
6320            release_id,
6321            'info',
6322            "%s Work part_level = %s",
6323            name_type,
6324            part_level)
6325        if name_type == 'title':
6326            if '~cwp_title_work_' + str(part_level - 1) in tm and tm['~cwp_title_work_' + str(
6327                    part_level)] == tm['~cwp_title_work_' + str(part_level - 1)] and width == 1:
6328                pass  # don't count higher part-levels which are not distinct from lower ones
6329                #  when the parent work has only one child
6330            else:
6331                tm['~cwp_title_work_levels'] = depth
6332                tm['~cwp_title_part_levels'] = part_level
6333        write_log(
6334            release_id,
6335            'info',
6336            "Set new metadata for %s OK",
6337            name_type)
6338
6339    def level0_warn(self, release_id, tm, level):
6340        """
6341        Issue warnings if inadequate level 0 data
6342        :param release_id: name for log file - usually =musicbrainz_albumid
6343        unless called outside metadata processor
6344        :param tm:
6345        :param level:
6346        :return:
6347        """
6348        write_log(
6349            release_id,
6350            'warning',
6351            'Unable to use level 0 as work name source in level %s - using hierarchy instead',
6352            level)
6353        if self.WARNING:
6354            self.append_tag(
6355                release_id,
6356                tm,
6357                '~cwp_warning',
6358                '5. Unable to use level 0 as work name source in level ' +
6359                str(level) +
6360                ' - using hierarchy instead')
6361
6362    def set_metadata(
6363            self,
6364            release_id,
6365            part_level,
6366            workId,
6367            parentId,
6368            parent,
6369            track):
6370        """
6371        Set the names of works and parts
6372        :param release_id: name for log file - usually =musicbrainz_albumid
6373        unless called outside metadata processor
6374        :param part_level:
6375        :param workId:
6376        :param parentId:
6377        :param parent:
6378        :param track:
6379        :return:
6380        """
6381        write_log(
6382                release_id,
6383                'debug',
6384                "SETTING METADATA FOR TRACK = %r, parent = %s, part_level = %s",
6385                track,
6386                parent,
6387                part_level)
6388        tm = track.metadata
6389        if parentId:
6390            self.write_tags(release_id, track, tm, parentId)
6391            self.make_annotations(release_id, track, parentId)
6392            if 'annotations' in self.parts[workId]:
6393                work_annotations = self.parts[workId]['annotations']
6394                self.parts[workId]['stripped_annotations'] = work_annotations
6395            else:
6396                work_annotations = []
6397            if 'annotations' in self.parts[parentId]:
6398                parent_annotations = self.parts[parentId]['annotations']
6399            else:
6400                parent_annotations = []
6401            if parent_annotations:
6402                work_annotations = [
6403                    z for z in work_annotations if z not in parent_annotations]
6404                self.parts[workId]['stripped_annotations'] = work_annotations
6405
6406            tm['~cwp_workid_' + str(part_level)] = parentId
6407            tm['~cwp_work_' + str(part_level)] = parent
6408            # maybe more than one work name
6409            work = self.parts[workId]['name']
6410            write_log(release_id, 'info', "Set work name to: %s", work)
6411            works = []
6412            # in case there is only one and it isn't in a list
6413            if isinstance(work, str):
6414                works.append(work)
6415            else:
6416                works = work[:]
6417            stripped_works = []
6418            for work in works:
6419                extend = True
6420                strip = self.strip_parent_from_work(
6421                    track, release_id, work, parent, part_level, extend, parentId, workId)
6422
6423                stripped_works.append(strip[0])
6424                write_log(
6425                        release_id,
6426                        'info',
6427                        "Parent: %s, Stripped works = %s",
6428                        parent,
6429                        stripped_works)
6430                # now == parent, after removing full_parent logic
6431                full_parent = strip[1]
6432                if full_parent != parent:
6433                    tm['~cwp_work_' +
6434                       str(part_level)] = full_parent.strip()
6435                    self.parts[parentId]['name'] = full_parent
6436                    if 'no_parent' in self.parts[parentId]:
6437                        if self.parts[parentId]['no_parent']:
6438                            tm['~cwp_work_top'] = full_parent.strip()
6439            tm['~cwp_part_' + str(part_level - 1)] = stripped_works
6440            self.parts[workId]['stripped_name'] = stripped_works
6441        write_log(release_id, 'debug', "GOT TO END OF SET_METADATA")
6442
6443    def write_tags(self, release_id, track, tm, workId):
6444        """
6445        write genre-related tags from internal variables
6446        :param track:
6447        :param release_id: name for log file - usually =musicbrainz_albumid
6448        unless called outside metadata processor
6449        :param tm: track metadata
6450        :param workId: MBID of current work
6451        :return: None - just writes tags
6452        """
6453        options = self.options[track]
6454        candidate_genres = []
6455        if options['cwp_genres_use_folks'] and 'folks_genres' in self.parts[workId]:
6456            candidate_genres += self.parts[workId]['folks_genres']
6457        if options['cwp_genres_use_worktype'] and 'worktype_genres' in self.parts[workId]:
6458            candidate_genres += self.parts[workId]['worktype_genres']
6459            self.append_tag(
6460                release_id,
6461                tm,
6462                '~cwp_worktype_genres',
6463                self.parts[workId]['worktype_genres'])
6464        self.append_tag(
6465            release_id,
6466            tm,
6467            '~cwp_candidate_genres',
6468            candidate_genres)
6469        self.append_tag(release_id, tm, '~cwp_keys', self.parts[workId]['key'])
6470        self.append_tag(release_id, tm, '~cwp_composed_dates',
6471                        self.parts[workId]['composed_dates'])
6472        self.append_tag(release_id, tm, '~cwp_published_dates',
6473                        self.parts[workId]['published_dates'])
6474        self.append_tag(release_id, tm, '~cwp_premiered_dates',
6475                        self.parts[workId]['premiered_dates'])
6476
6477    def make_annotations(self, release_id, track, wid):
6478        """
6479        create an 'annotations' entry in the 'parts' dict, as dictated by options, from dates and keys
6480        :param release_id: name for log file - usually =musicbrainz_albumid
6481        unless called outside metadata processor
6482        :param track: the current track
6483        :param wid: the current work MBID
6484        :return:
6485        """
6486        write_log(
6487                release_id,
6488                'debug',
6489                "Starting module %s",
6490                'make_annotations')
6491        options = self.options[track]
6492        if options['cwp_workdate_include']:
6493            if options['cwp_workdate_source_composed'] and 'composed_dates' in self.parts[wid] and self.parts[wid]['composed_dates']:
6494                workdates = self.parts[wid]['composed_dates']
6495            elif options['cwp_workdate_source_published'] and 'published_dates' in self.parts[wid] and self.parts[wid]['published_dates']:
6496                workdates = self.parts[wid]['published_dates']
6497            elif options['cwp_workdate_source_premiered'] and 'premiered_dates' in self.parts[wid] and self.parts[wid]['premiered_dates']:
6498                workdates = self.parts[wid]['premiered_dates']
6499            else:
6500                workdates = []
6501        else:
6502            workdates = []
6503        keys = []
6504        if options['cwp_key_include'] and 'key' in self.parts[wid] and self.parts[wid]['key']:
6505            keys = self.parts[wid]['key']
6506        elif options['cwp_key_contingent_include'] and 'key' in self.parts[wid] and self.parts[wid]['key']\
6507                and 'name' in self.parts[wid]:
6508            write_log(
6509                    release_id,
6510                    'info',
6511                    'checking for key. keys = %s, names = %s',
6512                    self.parts[wid]['key'],
6513                    self.parts[wid]['name'])
6514            # add all the parent names to the string for checking -
6515            work_name = list_to_str(self.parts[wid]['name'])
6516            work_chk = wid
6517            while work_chk in self.works_cache:
6518                parent_chk = tuple(self.works_cache[work_chk])
6519                if parent_chk in self.parts and self.parts[parent_chk] and 'name' in self.parts[parent_chk] and self.parts[parent_chk]['name']:
6520                    parent_name = list_to_str(self.parts[parent_chk]['name'])
6521                    p_name_orig = self.parts[parent_chk]['name']
6522                    p_chk = self.parts[parent_chk]
6523                    work_name = parent_name + ': ' + work_name
6524                work_chk = parent_chk
6525            # now see if the key has been mentioned in the work or its parents
6526            for key in self.parts[wid]['key']:
6527                # if not any([key.lower() in x.lower() for x in
6528                # str_to_list(work_name)]): #  TODO remove
6529                if not key.lower() in work_name.lower():
6530                    keys.append(key)
6531        annotations = keys + workdates
6532        if annotations:
6533            self.parts[wid]['annotations'] = annotations
6534        else:
6535            if 'annotations' in self.parts[wid]:
6536                del self.parts[wid]['annotations']
6537        write_log(
6538                release_id,
6539                'info',
6540                'make annotations has set id %s on track %s with annotation %s',
6541                wid,
6542                track,
6543                annotations)
6544        write_log(
6545                release_id,
6546                'debug',
6547                "Ending module %s",
6548                'make_annotations')
6549
6550    @staticmethod
6551    def derive_from_title(release_id, track, title):
6552        """
6553        Attempt to parse title to get components
6554        :param release_id: name for log file - usually =musicbrainz_albumid
6555        unless called outside metadata processor
6556        :param track:
6557        :param title:
6558        :return:
6559        """
6560        write_log(
6561                release_id,
6562                'info',
6563                "DERIVING METADATA FROM TITLE for track: %s",
6564                track)
6565        tm = track.metadata
6566        movt = title
6567        work = ""
6568        colons = title.count(": ")
6569        inter_work = None
6570        if '~cwp_part_levels' in tm:
6571            part_levels = int(tm['~cwp_part_levels'])
6572            if int(tm['~cwp_work_part_levels']
6573                   ) > 0:  # we have a work with movements
6574                if colons > 0:
6575                    title_split = title.split(': ', 1)
6576                    title_rsplit = title.rsplit(': ', 1)
6577                    if part_levels >= colons:
6578                        work = title_rsplit[0]
6579                        movt = title_rsplit[1]
6580                    else:
6581                        work = title_split[0]
6582                        movt = title_split[1]
6583        else:
6584            # No works found so try and just get parts from title
6585            if colons > 0:
6586                title_split = title.rsplit(': ', 1)
6587                work = title_split[0]
6588                if colons > 1:
6589                    colon_ind = work.rfind(':')
6590                    inter_work = work[colon_ind + 1:].strip()
6591                    work = work[:colon_ind]
6592                movt = title_split[1]
6593        write_log(release_id, 'info', "Work %s, Movt %s", work, movt)
6594        return work, movt, inter_work
6595
6596    def process_work_artists(
6597            self,
6598            release_id,
6599            album,
6600            track,
6601            workIds,
6602            tm,
6603            count):
6604        """
6605        Carry out the artist processing that needs to be done in the PartLevels class
6606        as it requires XML lookups of the works
6607        :param release_id: name for log file - usually =musicbrainz_albumid
6608        unless called outside metadata processor
6609        :param album:
6610        :param track:
6611        :param workIds:
6612        :param tm:
6613        :param count:
6614        :return:
6615        """
6616        if not self.options[track]['classical_extra_artists']:
6617            write_log(
6618                    release_id,
6619                    'debug',
6620                    'Not processing work_artists as ExtraArtists not selected to be run')
6621            return None
6622        write_log(
6623                release_id,
6624                'debug',
6625                'In process_work_artists for track: %s, workIds: %s',
6626                track,
6627                workIds)
6628        write_log(
6629                release_id,
6630                'debug',
6631                'In process_work_artists for track: %s, self.parts: %s',
6632                track,
6633                self.parts)
6634        if workIds in self.parts and 'arrangers' in self.parts[workIds]:
6635            write_log(
6636                    release_id,
6637                    'info',
6638                    'Arrangers = %s',
6639                    self.parts[workIds]['arrangers'])
6640            set_work_artists(
6641                self,
6642                release_id,
6643                album,
6644                track,
6645                self.parts[workIds]['arrangers'],
6646                tm,
6647                count)
6648        if workIds in self.works_cache:
6649            count += 1
6650            self.process_work_artists(release_id, album, track, tuple(
6651                self.works_cache[workIds]), tm, count)
6652
6653    #################################################
6654    # SECTION 5 - Extend work metadata using titles #
6655    #################################################
6656
6657    def extend_metadata(self, release_id, top_info, track, ref_height, depth):
6658        """
6659        Combine MB work and title data according to user options
6660        :param release_id: name for log file - usually =musicbrainz_albumid
6661        unless called outside metadata processor
6662        :param top_info:
6663        :param track:
6664        :param ref_height:
6665        :param depth:
6666        :return:
6667        """
6668        write_log(release_id, 'debug', 'IN EXTEND_METADATA')
6669        tm = track.metadata
6670        options = self.options[track]
6671        movementgroup = ()
6672        if '~cwp_part_levels' not in tm:
6673            write_log(
6674                    release_id,
6675                    'debug',
6676                    'NO PART LEVELS. Metadata = %s',
6677                    tm)
6678            return None
6679        part_levels = int(tm['~cwp_part_levels'])
6680        write_log(
6681                release_id,
6682                'debug',
6683                "Extending metadata for track: %s, ref_height: %s, depth: %s, part_levels: %s",
6684                track,
6685                ref_height,
6686                depth,
6687                part_levels)
6688        write_log(release_id, 'info', "Metadata = %s", tm)
6689
6690        # previously: ref_height = work_part_levels - ref_level,
6691        # where this ref-level is the level for the top-named work
6692        # so ref_height is effectively the "single work album" indicator (1 or 0) -
6693        #   i.e. where all tracks are part of one work which is implicitly the album
6694        #   without there being a groupheading for it
6695        ref_level = part_levels - ref_height
6696        # work_ref_level = work_part_levels - ref_height # not currently used
6697
6698        # replace works and parts by those derived from the level 0 work, where
6699        # required, available and appropriate, but only use work names based on
6700        # level 0 text if it doesn't cause ambiguity
6701
6702        # before embellishing with partial / arrangement etc
6703        vanilla_part = tm['~cwp_part_0']
6704
6705        # Fix text for arrangements, partials and medleys (Done here so that
6706        # cache can be used)
6707        if options['cwp_arrangements'] and options["cwp_arrangements_text"]:
6708            for lev in range(
6709                    0,
6710                    ref_level):  # top level will not be an arrangement else there would be a higher level
6711                # needs to be a tuple to match
6712                if '~cwp_workid_' + str(lev) in tm:
6713                    tup_id = tuple(str_to_list(tm['~cwp_workid_' + str(lev)]))
6714                    if 'arrangement' in self.parts[tup_id] and self.parts[tup_id]['arrangement']:
6715                        update_list = ['~cwp_work_', '~cwp_part_']
6716                        if options["cwp_level0_works"] and '~cwp_X0_work_' + \
6717                                str(lev) in tm:
6718                            update_list += ['~cwp_X0_work_', '~cwp_X0_part_']
6719                        for item in update_list:
6720                            tm[item + str(lev)] = options["cwp_arrangements_text"] + \
6721                                ' ' + tm[item + str(lev)]
6722
6723        if options['cwp_partial'] and options["cwp_partial_text"]:
6724            if '~cwp_workid_0' in tm:
6725                work0_id = tuple(str_to_list(tm['~cwp_workid_0']))
6726                if 'partial' in self.parts[work0_id] and self.parts[work0_id]['partial']:
6727                    update_list = ['~cwp_work_0', '~cwp_part_0']
6728                    if options["cwp_level0_works"] and '~cwp_X0_work_0' in tm:
6729                        update_list += ['~cwp_X0_work_0', '~cwp_X0_part_0']
6730                    for item in update_list:
6731                        meta_item = tm.getall(item)
6732                        if isinstance(
6733                                meta_item, list):  # it should be a list as I think getall always returns a list
6734                            if meta_item == []:
6735                                meta_item.append(options["cwp_partial_text"])
6736                            else:
6737                                for ind, w in enumerate(meta_item):
6738                                    meta_item[ind] = options["cwp_partial_text"] + ' ' + w
6739                            write_log(
6740                                release_id, 'info', 'now meta item is %s', meta_item)
6741                            tm[item] = meta_item
6742                        else:
6743                            tm[item] = options["cwp_partial_text"] + \
6744                                ' ' + tm[item]
6745                            write_log(
6746                                release_id, 'info', 'meta item is not a list')
6747
6748        # fix "type 1" medley text
6749        if options['cwp_medley']:
6750            for lev in range(0, ref_level + 1):
6751                if '~cwp_workid_' + str(lev) in tm:
6752                    tup_id = tuple(str_to_list(tm['~cwp_workid_' + str(lev)]))
6753                    if 'medley_list' in self.parts[tup_id] and self.parts[tup_id]['medley_list']:
6754                        medley_list = self.parts[tup_id]['medley_list']
6755                        tm['~cwp_work_' + str(lev)] += " (" + options["cwp_medley_text"] + \
6756                            ': ' + ', '.join(medley_list) + ")"
6757                        if '~cwp_part_' + str(lev) in tm:
6758                            tm['~cwp_part_' + str(
6759                                lev)] = "(" + options["cwp_medley_text"] + ") " + tm['~cwp_part_' + str(lev)]
6760
6761        # add any annotations for dates and keys
6762        if options['cwp_workdate_include'] or options['cwp_key_include'] or options['cwp_key_contingent_include']:
6763            if options["cwp_titles"] and part_levels == 0:
6764                # ~cwp_title_work_0 will not have been set, but need it to hold any annotations
6765                tm['~cwp_title_work_0'] = tm['~cwp_title'] or tm['title']
6766            for lev in range(0, part_levels + 1):
6767                if '~cwp_workid_' + str(lev) in tm:
6768                    tup_id = tuple(str_to_list(tm['~cwp_workid_' + str(lev)]))
6769                    if 'annotations' in self.parts[tup_id]:
6770                        write_log(
6771                                release_id,
6772                                'info',
6773                                'in extend_metadata, annotations for id %s on track %s are %s',
6774                                tup_id,
6775                                track,
6776                                self.parts[tup_id]['annotations'])
6777                        tm['~cwp_work_' + str(lev)] += " (" + \
6778                            ', '.join(self.parts[tup_id]['annotations']) + ")"
6779                        if options["cwp_level0_works"] and '~cwp_X0_work_' + \
6780                                str(lev) in tm:
6781                            tm['~cwp_X0_work_' + str(lev)] += " (" + ', '.join(
6782                                self.parts[tup_id]['annotations']) + ")"
6783                        if options["cwp_titles"] and '~cwp_title_work_' + \
6784                                str(lev) in tm:
6785                            tm['~cwp_title_work_' + str(lev)] += " (" + ', '.join(
6786                                self.parts[tup_id]['annotations']) + ")"
6787                        if lev < part_levels:
6788                            if 'stripped_annotations' in self.parts[tup_id]:
6789                                if self.parts[tup_id]['stripped_annotations']:
6790                                    tm['~cwp_part_' + str(lev)] += " (" + ', '.join(
6791                                        self.parts[tup_id]['stripped_annotations']) + ")"
6792                                    if options["cwp_level0_works"] and '~cwp_X0_part_' + \
6793                                            str(lev) in tm:
6794                                        tm['~cwp_X0_part_' + str(lev)] += " (" + ', '.join(
6795                                            self.parts[tup_id]['stripped_annotations']) + ")"
6796                                    if options["cwp_titles"] and '~cwp_title_part_' + \
6797                                            str(lev) in tm:
6798                                        tm['~cwp_title_part' + str(lev)] += " (" + ', '.join(
6799                                            self.parts[tup_id]['stripped_annotations']) + ")"
6800
6801        part = []
6802        work = []
6803        for level in range(0, part_levels):
6804            part.append(tm['~cwp_part_' + str(level)])
6805            work.append(tm['~cwp_work_' + str(level)])
6806        work.append(tm['~cwp_work_' + str(part_levels)])
6807
6808        # Use level_0-derived names if applicable
6809        if options["cwp_level0_works"]:
6810            for level in range(0, part_levels + 1):
6811                if '~cwp_X0_work_' + str(level) in tm:
6812                    work[level] = tm['~cwp_X0_work_' + str(level)]
6813                else:
6814                    if level != 0:
6815                        work[level] = ''
6816                if part and len(part) > level:
6817                    if '~cwp_X0_part_' + str(level) in tm:
6818                        part[level] = tm['~cwp_X0_part_' + str(level)]
6819                    else:
6820                        if level != 0:
6821                            part[level] = ''
6822
6823        # set up group heading and part
6824        if part_levels > 0:
6825            groupheading = work[1]
6826            work_main = work[ref_level]
6827            inter_work = None
6828            work_titles = tm['~cwp_title_work_' + str(ref_level)]
6829            if ref_level > 1:
6830                for r in range(1, ref_level):
6831                    if inter_work:
6832                        inter_work = ': ' + inter_work
6833                    inter_work = part[r] + (inter_work or '')
6834                groupheading = work[ref_level] + ':: ' + (inter_work or '')
6835        else:
6836            groupheading = work[0]
6837            work_main = groupheading
6838            inter_work = None
6839            work_titles = None
6840
6841        # determine movement grouping (highest level that is not a collection)
6842        if '~cwp_workid_top' in tm:
6843            movementgroup = tuple(str_to_list(tm['~cwp_workid_top']))
6844            n = part_levels
6845            write_log(
6846                    release_id,
6847                    'debug',
6848                    "In extend. self.parts[%s]['is_collection']: %s",
6849                    movementgroup,
6850                    self.parts[movementgroup]['is_collection'])
6851            while self.parts[movementgroup]['is_collection']:
6852                n -= 1
6853                if n < 0:
6854                    # shouldn't happen in theory as bottom level can't be a collection, but just in case...
6855                    break
6856                if '~cwp_workid_'  + str(n) in tm:
6857                    movementgroup = tuple(str_to_list(tm['~cwp_workid_'  + str(n)]))
6858                else:
6859                    break
6860
6861        # set part text (initially)
6862        if part:
6863            part_main = part[0]
6864        else:
6865            part_main = work[0]
6866        tm['~cwp_part'] = part_main
6867
6868        # fix medley text for "type 2" medleys
6869        type2_medley = False
6870        if self.parts[tuple(str_to_list(tm['~cwp_workid_0']))
6871                      ]['medley'] and options['cwp_medley']:
6872            if options["cwp_medley_text"]:
6873                if part_levels > 0:
6874                    medleyheading = groupheading + ':: ' + part[0]
6875                else:
6876                    medleyheading = groupheading
6877                groupheading = medleyheading + \
6878                    ' (' + options["cwp_medley_text"] + ')'
6879            type2_medley = True
6880
6881        tm['~cwp_groupheading'] = groupheading
6882        tm['~cwp_work'] = work_main
6883        tm['~cwp_inter_work'] = inter_work
6884        tm['~cwp_title_work'] = work_titles
6885        write_log(
6886                release_id,
6887                'debug',
6888                "Groupheading set to: %s",
6889                groupheading)
6890        # extend group heading from title metadata
6891        if groupheading:
6892            ext_groupheading = groupheading
6893            title_groupheading = None
6894            ext_work = work_main
6895            ext_inter_work = inter_work
6896            inter_title_work = ""
6897
6898            if '~cwp_title_work_levels' in tm:
6899
6900                title_depth = int(tm['~cwp_title_work_levels'])
6901                write_log(
6902                        release_id,
6903                        'info',
6904                        "Title_depth: %s",
6905                        title_depth)
6906                diff_work = [""] * ref_level
6907                diff_part = [""] * ref_level
6908                title_tag = [""]
6909                # level 0 work for title # was 'x'  # to avoid errors, reset
6910                # before used
6911                tw_str_lower = 'title'
6912                max_d = min(ref_level, title_depth) + 1
6913                for d in range(1, max_d):
6914                    tw_str = '~cwp_title_work_' + str(d)
6915                    write_log(release_id, 'info', "TW_STR = %s", tw_str)
6916                    if tw_str in tm:
6917                        title_tag.append(tm[tw_str])
6918                        title_work = title_tag[d]
6919                        work_main = ''
6920                        for w in range(d, ref_level + 1):
6921                            work_main += (work[w] + ' ')
6922                        diff_work[d - 1] = self.diff_pair(
6923                            release_id, track, tm, work_main, title_work)
6924                        if diff_work[d - 1]:
6925                            diff_work[d - 1] = diff_work[d - 1].strip('.;:-,')
6926                            if diff_work[d - 1] == '…':
6927                                diff_work[d - 1] = ''
6928                        if d > 1 and tw_str_lower in tm:
6929                            title_part = self.strip_parent_from_work(
6930                                track, release_id, tm[tw_str_lower], tm[tw_str], 0, False)[0]
6931                            if title_part:
6932                                title_part = title_part.strip(' .;:-,')
6933                            tm['~cwp_title_part_' +
6934                                str(d - 1)] = title_part
6935                            part_n = part[d - 1]
6936                            diff_part[d - 1] = self.diff_pair(
6937                                release_id, track, tm, part_n, title_part) or ""
6938                            if diff_part[d - 1] == '…':
6939                                diff_part[d - 1] = ''
6940                    else:
6941                        title_tag.append('')
6942                    tw_str_lower = tw_str
6943                # remove duplicate items at lower levels in diff_work:
6944                for w in range(ref_level - 2, -1, -1):
6945                    for higher in range(1, ref_level - w):
6946                        if diff_work[w] and diff_work[w + higher]:
6947                            diff_work[w] = diff_work[w].replace(
6948                                diff_work[w + higher], '').strip(' .;:-,\u2026')
6949                            # if diff_work[w] == '…':
6950                            #     diff_work[w] = ''
6951                write_log(
6952                        release_id,
6953                        'info',
6954                        "diff list for works: %s",
6955                        diff_work)
6956                write_log(
6957                        release_id,
6958                        'info',
6959                        "diff list for parts: %s",
6960                        diff_part)
6961                if not diff_work or len(diff_work) == 0:
6962                    if part_levels > 0:
6963                        ext_groupheading = groupheading
6964                else:
6965                    write_log(
6966                            release_id,
6967                            'debug',
6968                            "Now calc extended groupheading...")
6969                    write_log(
6970                            release_id,
6971                            'info',
6972                            "depth = %s, ref_level = %s, title_depth = %s",
6973                            depth,
6974                            ref_level,
6975                            title_depth)
6976                    write_log(
6977                            release_id,
6978                            'info',
6979                            "diff_work = %s, diff_part = %s",
6980                            diff_work,
6981                            diff_part)
6982                    # remove duplications:
6983                    for lev in range(1, ref_level):
6984                        for diff_list in [diff_work, diff_part]:
6985                            if diff_list[lev] and diff_list[lev - 1]:
6986                                diff_list[lev - 1] = self.diff_pair(
6987                                    release_id, track, tm, diff_list[lev], diff_list[lev - 1])
6988                                if diff_list[lev - 1] == '…':
6989                                    diff_list[lev - 1] = ''
6990                    write_log(
6991                            release_id,
6992                            'info',
6993                            "Removed duplication. Revised diff_work = %s, diff_part = %s",
6994                            diff_work,
6995                            diff_part)
6996                    if part_levels > 0 and depth >= 1:
6997                        addn_work = []
6998                        addn_part = []
6999                        for stripped_work in diff_work:
7000                            if stripped_work:
7001                                write_log(
7002                                        release_id, 'info', "Stripped work = %s", stripped_work)
7003                                addn_work.append(" {" + stripped_work + "}")
7004                            else:
7005                                addn_work.append("")
7006                        for stripped_part in diff_part:
7007                            if stripped_part and stripped_part != "":
7008                                write_log(release_id, 'info', "Stripped part = %s", stripped_part)
7009                                addn_part.append(" {" + stripped_part + "}")
7010                            else:
7011                                addn_part.append("")
7012                        write_log(
7013                                release_id,
7014                                'info',
7015                                "addn_work = %s, addn_part = %s",
7016                                addn_work,
7017                                addn_part)
7018                        ext_groupheading = work[1] + addn_work[0]
7019                        ext_work = work[ref_level] + addn_work[ref_level - 1]
7020                        ext_inter_work = ""
7021                        inter_title_work = ""
7022                        title_groupheading = tm['~cwp_title_work_1']
7023                        if ref_level > 1:
7024                            for r in range(1, ref_level):
7025                                if ext_inter_work:
7026                                    ext_inter_work = ': ' + ext_inter_work
7027                                ext_inter_work = part[r] + \
7028                                    addn_work[r - 1] + ext_inter_work
7029                            ext_groupheading = work[ref_level] + \
7030                                addn_work[ref_level - 1] + ':: ' + ext_inter_work
7031                        if title_depth > 1 and ref_level > 1:
7032                            for r in range(1, min(title_depth, ref_level)):
7033                                if inter_title_work:
7034                                    inter_title_work = ': ' + inter_title_work
7035                                inter_title_work = tm['~cwp_title_part_' +
7036                                                      str(r)] + inter_title_work
7037                            title_groupheading = tm['~cwp_title_work_' + str(
7038                                min(title_depth, ref_level))] + ':: ' + inter_title_work
7039
7040                    else:
7041                        ext_groupheading = groupheading  # title will be in part
7042                        ext_work = work_main
7043                        ext_inter_work = inter_work
7044                        inter_title_work = ""
7045
7046                    write_log(release_id, 'debug', ".... ext_groupheading done")
7047
7048            if ext_groupheading:
7049                write_log(
7050                        release_id,
7051                        'info',
7052                        "EXTENDED GROUPHEADING: %s",
7053                        ext_groupheading)
7054                tm['~cwp_extended_groupheading'] = ext_groupheading
7055                tm['~cwp_extended_work'] = ext_work
7056                if ext_inter_work:
7057                    tm['~cwp_extended_inter_work'] = ext_inter_work
7058                if inter_title_work:
7059                    tm['~cwp_inter_title_work'] = inter_title_work
7060                if title_groupheading:
7061                    tm['~cwp_title_groupheading'] = title_groupheading
7062                    write_log(
7063                            release_id,
7064                            'info',
7065                            "title_groupheading = %s",
7066                            title_groupheading)
7067        # extend part from title metadata
7068        write_log(
7069                release_id,
7070                'debug',
7071                "NOW EXTEND PART...(part = %s)",
7072                part_main)
7073        if part_main:
7074            if '~cwp_title_part_0' in tm:
7075                movement = tm['~cwp_title_part_0']
7076            else:
7077                movement = tm['~cwp_title_part_0'] or tm['~cwp_title'] or tm['title']
7078            if '~cwp_extended_groupheading' in tm:
7079                work_compare = tm['~cwp_extended_groupheading'] + \
7080                    ': ' + part_main
7081            elif '~cwp_work_1' in tm:
7082                work_compare = work[1] + ': ' + part_main
7083            else:
7084                work_compare = work[0]
7085            diff = self.diff_pair(
7086                release_id, track, tm, work_compare, movement)
7087            # compare with the fullest possible work name, not the stripped one
7088            #  - to maximise the duplication elimination
7089            reverse_diff = self.diff_pair(
7090                release_id, track, tm, movement, vanilla_part)
7091            # for the reverse comparison use the part name without any work details or annotation
7092            if diff and reverse_diff and self.parts[tuple(str_to_list(tm['~cwp_workid_0']))]['partial']:
7093                diff = movement
7094            # for partial tracks, do not eliminate the title text as it is
7095            # frequently deliberately a component of the the overall work txt
7096            # (unless it is identical)
7097            fill_part = options['cwp_fill_part']
7098            # To fill part with title text if it
7099            # would otherwise have no text other than arrangement or partial
7100            # annotations
7101            if not diff and not vanilla_part and part_levels > 0 and fill_part:
7102                # In other words the movement will have no text other than
7103                # arrangement or partial annotations
7104                diff = movement
7105            write_log(release_id, 'info', "DIFF PART - MOVT. ti =%s", diff)
7106            write_log(release_id,
7107                          'info',
7108                          'medley indicator for %s is %s',
7109                          tm['~cwp_workid_0'],
7110                          self.parts[tuple(str_to_list(tm['~cwp_workid_0']))]['medley'])
7111
7112            if type2_medley:
7113                tm['~cwp_extended_part'] = "{" + movement + "}"
7114            else:
7115                if diff:
7116                    tm['~cwp_extended_part'] = part_main + \
7117                        " {" + diff.strip() + "}"
7118                else:
7119                    tm['~cwp_extended_part'] = part_main
7120                if part_levels == 0:
7121                    if tm['~cwp_extended_groupheading']:
7122                        del tm['~cwp_extended_groupheading']
7123
7124        # remove unwanted groupheadings (needed them up to now for adding
7125        # extensions)
7126        if '~cwp_groupheading' in tm and tm['~cwp_groupheading'] == tm['~cwp_part']:
7127            del tm['~cwp_groupheading']
7128        if '~cwp_title_groupheading' in tm and tm['~cwp_title_groupheading'] == tm['~cwp_title_part']:
7129            del tm['~cwp_title_groupheading']
7130        # clean up groupheadings (may be stray separators if level 0  or title
7131        # options used)
7132        if '~cwp_groupheading' in tm:
7133            tm['~cwp_groupheading'] = tm['~cwp_groupheading'].strip(
7134                ':').strip(
7135                options['cwp_single_work_sep']).strip(
7136                options['cwp_multi_work_sep'])
7137        if '~cwp_extended_groupheading' in tm:
7138            tm['~cwp_extended_groupheading'] = tm['~cwp_extended_groupheading'].strip(
7139                ':').strip(
7140                options['cwp_single_work_sep']).strip(
7141                options['cwp_multi_work_sep'])
7142        if '~cwp_title_groupheading' in tm:
7143            tm['~cwp_title_groupheading'] = tm['~cwp_title_groupheading'].strip(
7144                ':').strip(
7145                options['cwp_single_work_sep']).strip(
7146                options['cwp_multi_work_sep'])
7147        write_log(release_id, 'debug', "....done")
7148        return movementgroup
7149
7150    ##########################################################
7151    # SECTION 6- Write metadata to tags according to options #
7152    ##########################################################
7153
7154    def publish_metadata(self, release_id, album, track, movement_info={}):
7155        """
7156        Write out the metadata according to user options
7157        :param release_id: name for log file - usually =musicbrainz_albumid
7158        unless called outside metadata processor
7159        :param album:
7160        :param track:
7161        :param movement_info: format is {'movement-group': movementgroup, 'movement-number': movementnumber}
7162        :return:
7163        """
7164        write_log(release_id, 'debug', "IN PUBLISH METADATA for %s", track)
7165        options = self.options[track]
7166        tm = track.metadata
7167        tm['~cwp_version'] = PLUGIN_VERSION
7168
7169        # set movement grouping tags (hidden vars)
7170        if movement_info:
7171            movementtotal = self.parts[tuple(movement_info['movement-group'])]['movement-total']
7172            if movementtotal > 1:
7173                tm['~cwp_movt_num'] = movement_info['movement-number']
7174                tm['~cwp_movt_tot'] = movementtotal
7175
7176        # album composers needed by map_tags (set in set_work_artists)
7177        if 'composer_lastnames' in self.album_artists[album]:
7178            last_names = seq_last_names(self, album)
7179            self.append_tag(
7180                release_id,
7181                tm,
7182                '~cea_album_composer_lastnames',
7183                last_names)
7184
7185        write_log(release_id, 'info', "Check options")
7186        if options["cwp_titles"]:
7187            write_log(release_id, 'info', "titles")
7188            part = tm['~cwp_title_part_0'] or tm['~cwp_title_work_0']or tm['~cwp_title'] or tm['title']
7189            # for multi-level work display
7190            groupheading = tm['~cwp_title_groupheading'] or ""
7191            # for single-level work display
7192            work = tm['~cwp_title_work'] or ""
7193            inter_work = tm['~cwp_inter_title_work'] or ""
7194        elif options["cwp_works"]:
7195            write_log(release_id, 'info', "works")
7196            part = tm['~cwp_part']
7197            groupheading = tm['~cwp_groupheading'] or ""
7198            work = tm['~cwp_work'] or ""
7199            inter_work = tm['~cwp_inter_work'] or ""
7200        else:
7201            # options["cwp_extended"]
7202            write_log(release_id, 'info', "extended")
7203            part = tm['~cwp_extended_part']
7204            groupheading = tm['~cwp_extended_groupheading'] or ""
7205            work = tm['~cwp_extended_work'] or ""
7206            inter_work = tm['~cwp_extended_inter_work'] or ""
7207        write_log(release_id, 'info', "Done options")
7208        p1 = RE_ROMANS_AT_START
7209        # Matches positive integers with punctuation
7210        p2 = re.compile(r'^\W*\d+[.):-]')
7211        movt = part
7212        for _ in range(
7213                0, 5):  # in case of multiple levels
7214            movt = p2.sub('', p1.sub('', movt)).strip()
7215        write_log(release_id, 'info', "Done movt")
7216        movt_inc_tags = options["cwp_movt_tag_inc"].split(",")
7217        movt_inc_tags = [x.strip(' ') for x in movt_inc_tags]
7218        movt_exc_tags = options["cwp_movt_tag_exc"].split(",")
7219        movt_exc_tags = [x.strip(' ') for x in movt_exc_tags]
7220        movt_inc_1_tags = options["cwp_movt_tag_inc1"].split(",")
7221        movt_inc_1_tags = [x.strip(' ') for x in movt_inc_1_tags]
7222        movt_exc_1_tags = options["cwp_movt_tag_exc1"].split(",")
7223        movt_exc_1_tags = [x.strip(' ') for x in movt_exc_1_tags]
7224        movt_no_tags = options["cwp_movt_no_tag"].split(",")
7225        movt_no_tags = [x.strip(' ') for x in movt_no_tags]
7226        movt_no_sep = options["cwp_movt_no_sep"]
7227        movt_tot_tags = options["cwp_movt_tot_tag"].split(",")
7228        movt_tot_tags = [x.strip(' ') for x in movt_tot_tags]
7229        gh_tags = options["cwp_work_tag_multi"].split(",")
7230        gh_tags = [x.strip(' ') for x in gh_tags]
7231        gh_sep = options["cwp_multi_work_sep"]
7232        work_tags = options["cwp_work_tag_single"].split(",")
7233        work_tags = [x.strip(' ') for x in work_tags]
7234        work_sep = options["cwp_single_work_sep"]
7235        top_tags = options["cwp_top_tag"].split(",")
7236        top_tags = [x.strip(' ') for x in top_tags]
7237
7238        write_log(
7239                release_id,
7240                'info',
7241                "Done splits. gh_tags: %s, work_tags: %s, movt_inc_tags: %s, movt_exc_tags: %s, movt_no_tags: %s",
7242                gh_tags,
7243                work_tags,
7244                movt_inc_tags,
7245                movt_exc_tags,
7246                movt_no_tags)
7247
7248        for tag in gh_tags + work_tags + movt_inc_tags + movt_exc_tags + movt_no_tags:
7249            tm[tag] = ""
7250        for tag in gh_tags:
7251            if tag in movt_inc_tags + movt_exc_tags + movt_no_tags:
7252                self.append_tag(release_id, tm, tag, groupheading, gh_sep)
7253            else:
7254                self.append_tag(release_id, tm, tag, groupheading)
7255        for tag in work_tags:
7256            if tag in movt_inc_1_tags + movt_exc_1_tags + movt_no_tags:
7257                self.append_tag(release_id, tm, tag, work, work_sep)
7258            else:
7259                self.append_tag(release_id, tm, tag, work)
7260            if '~cwp_part_levels' in tm and int(tm['~cwp_part_levels']) > 0:
7261                self.append_tag(
7262                    release_id,
7263                    tm,
7264                    'show work movement',
7265                    '1')  # original tag for iTunes, kept for backwards compatibility
7266                self.append_tag(
7267                    release_id,
7268                    tm,
7269                    'showmovement',
7270                    '1')  # new tag for iTunes & MusicBee, consistent with Picard tag docs
7271        for tag in top_tags:
7272            if '~cwp_work_top' in tm:
7273                self.append_tag(release_id, tm, tag, tm['~cwp_work_top'])
7274
7275        if '~cwp_movt_num' in tm and len(tm['~cwp_movt_num']) > 0:
7276            movt_num_punc = tm['~cwp_movt_num'] + movt_no_sep + ' '
7277        else:
7278            movt_num_punc = ''
7279
7280        for tag in movt_no_tags:
7281            if tag not in movt_inc_tags + movt_exc_tags + movt_inc_1_tags + movt_exc_1_tags:
7282                self.append_tag(release_id, tm, tag, tm['~cwp_movt_num'])
7283
7284        for tag in movt_tot_tags:
7285            self.append_tag(release_id, tm, tag, tm['~cwp_movt_tot'])
7286
7287        for tag in movt_exc_tags:
7288            if tag in movt_no_tags:
7289                movt = movt_num_punc + movt
7290            self.append_tag(release_id, tm, tag, movt)
7291
7292        for tag in movt_inc_tags:
7293            if tag in movt_no_tags:
7294                part = movt_num_punc + part
7295            self.append_tag(release_id, tm, tag, part)
7296
7297
7298        for tag in movt_inc_1_tags + movt_exc_1_tags:
7299            if tag in movt_inc_1_tags:
7300                pt = part
7301            else:
7302                pt = movt
7303            if tag in movt_no_tags:
7304                pt = movt_num_punc + pt
7305            if inter_work and inter_work != "":
7306                if tag in movt_exc_tags + movt_inc_tags and tag != "":
7307                    write_log(
7308                        release_id,
7309                        'warning',
7310                        "Tag %s will have multiple contents",
7311                        tag)
7312                    if self.WARNING:
7313                        self.append_tag(release_id, tm, '~cwp_warning', '6. Tag ' +
7314                                    tag +
7315                                    ' has multiple contents')
7316                self.append_tag(
7317                    release_id,
7318                    tm,
7319                    tag,
7320                    inter_work +
7321                    work_sep +
7322                    " " +
7323                    pt)
7324            else:
7325                self.append_tag(release_id, tm, tag, pt)
7326
7327        for tag in movt_exc_tags + movt_inc_tags + movt_exc_1_tags + movt_inc_1_tags:
7328            if tag in movt_no_tags:
7329                # i.e treat as one item, not multiple
7330                tm[tag] = "".join(re.split('|'.join(self.SEPARATORS), tm[tag]))
7331
7332        # write "SongKong" tags
7333        if options['cwp_write_sk']:
7334            write_log(release_id, 'debug', "Writing SongKong work tags")
7335            if '~cwp_part_levels' in tm:
7336                part_levels = int(tm['~cwp_part_levels'])
7337                for n in range(0, part_levels + 1):
7338                    if '~cwp_work_' + \
7339                            str(n) in tm and '~cwp_workid_' + str(n) in tm:
7340                        source = tm['~cwp_work_' + str(n)]
7341                        source_id = list(
7342                            tuple(str_to_list(tm['~cwp_workid_' + str(n)])))
7343                        if n == 0:
7344                            self.append_tag(
7345                                release_id, tm, 'musicbrainz_work_composition', source)
7346                            for source_id_item in source_id:
7347                                self.append_tag(
7348                                    release_id, tm, 'musicbrainz_work_composition_id', source_id_item)
7349                        if n == part_levels:
7350                            self.append_tag(
7351                                release_id, tm, 'musicbrainz_work', source)
7352                            if 'musicbrainz_workid' in tm:
7353                                del tm['musicbrainz_workid']
7354                            # Delete the Picard version of this tag before
7355                            # replacing it with the SongKong version
7356                            for source_id_item in source_id:
7357                                self.append_tag(
7358                                    release_id, tm, 'musicbrainz_workid', source_id_item)
7359                        if n != 0 and n != part_levels:
7360                            self.append_tag(
7361                                release_id, tm, 'musicbrainz_work_part_level' + str(n), source)
7362                            for source_id_item in source_id:
7363                                self.append_tag(
7364                                    release_id,
7365                                    tm,
7366                                    'musicbrainz_work_part_level' +
7367                                    str(n) +
7368                                    '_id',
7369                                    source_id_item)
7370
7371        # carry out tag mapping
7372        tm['~cea_works_complete'] = "Y"
7373        map_tags(options, release_id, album, tm)
7374
7375        write_log(release_id, 'debug', "Published metadata for %s", track)
7376        if options['cwp_options_tag'] != "":
7377            self.cwp_options = collections.defaultdict(
7378                lambda: collections.defaultdict(dict))
7379
7380            for opt in plugin_options('workparts') + plugin_options('genres'):
7381                if 'name' in opt:
7382                    if 'value' in opt:
7383                        if options[opt['option']]:
7384                            self.cwp_options['Classical Extras']['Works options'][opt['name']] = opt['value']
7385                    else:
7386                        self.cwp_options['Classical Extras']['Works options'][opt['name']
7387                                                                              ] = options[opt['option']]
7388
7389            write_log(release_id, 'info', "Options %s", self.cwp_options)
7390            if options['ce_version_tag'] and options['ce_version_tag'] != "":
7391                self.append_tag(release_id, tm, options['ce_version_tag'], str(
7392                    'Version ' + tm['~cwp_version'] + ' of Classical Extras'))
7393            if options['cwp_options_tag'] and options['cwp_options_tag'] != "":
7394                self.append_tag(release_id, tm, options['cwp_options_tag'] +
7395                                ':workparts_options', json.loads(
7396                    json.dumps(
7397                        self.cwp_options)))
7398        if self.ERROR and "~cwp_error" in tm:
7399            for error in str_to_list(tm['~cwp_error']):
7400                code = error[0]
7401                self.append_tag(release_id, tm, '001_errors:' + code, error)
7402        if self.WARNING and "~cwp_warning" in tm:
7403            for warning in str_to_list(tm['~cwp_warning']):
7404                wcode = warning[0]
7405                self.append_tag(release_id, tm, '002_warnings:' + wcode, warning)
7406
7407
7408    def append_tag(self, release_id, tm, tag, source, sep=None):
7409        """
7410        pass to main append routine
7411        :param release_id: name for log file - usually =musicbrainz_albumid
7412        unless called outside metadata processor
7413        :param tm:
7414        :param tag:
7415        :param source:
7416        :param sep: separators may be used to split string into list on appending
7417        :return:
7418        """
7419        write_log(
7420                release_id,
7421                'info',
7422                "In append_tag (Work parts). tag = %s, source = %s, sep =%s",
7423                tag,
7424                source,
7425                sep)
7426        append_tag(release_id, tm, tag, source, self.SEPARATORS)
7427        write_log(
7428                release_id,
7429                'info',
7430                "Appended. Resulting contents of tag: %s are: %s",
7431                tag,
7432                tm[tag])
7433
7434    ################################################
7435    # SECTION 7 - Common string handling functions #
7436    ################################################
7437
7438    def strip_parent_from_work(
7439            self,
7440            track,
7441            release_id,
7442            work,
7443            parent,
7444            part_level,
7445            extend,
7446            parentId=None,
7447            workId=None):
7448        """
7449        Remove common text
7450        :param track:
7451        :param release_id: name for log file - usually =musicbrainz_albumid
7452        unless called outside metadata processor
7453        :param work: could be a list of works, all of which require stripping
7454        :param parent:
7455        :param part_level:
7456        :param extend:
7457        :param parentId:
7458        :param workId:
7459        :return:
7460        """
7461        # extend=True is used [ NO LONGER to find "full_parent" names] + (with parentId)
7462        #  to trigger recursion if unable to strip parent name from work and also to look for common subsequences
7463        # extend=False is used when this routine is called for other purposes
7464        # than strict work: parent relationships
7465        options = self.options[track]
7466        write_log(
7467            release_id,
7468            'debug',
7469            "STRIPPING HIGHER LEVEL WORK TEXT FROM PART NAMES")
7470        write_log(
7471            release_id,
7472            'info',
7473            'PARAMS: WORK = %r, PARENT = %s, PART_LEVEL = %s, EXTEND= %s',
7474            work,
7475            parent,
7476            part_level,
7477            extend)
7478        if isinstance(work, list):
7479            result = []
7480            for w, work_item in enumerate(work):
7481                if workId and isinstance(workId, list):
7482                    sub_workId = workId[w]
7483                else:
7484                    sub_workId = workId
7485                result.append(
7486                    self.strip_parent_from_work(
7487                        track,
7488                        release_id,
7489                        work_item,
7490                        parent,
7491                        part_level,
7492                        extend,
7493                        parentId,
7494                        sub_workId)[0])
7495            return result, parent
7496        if not isinstance(parent, str):
7497            # in case it is a list - make sure it is a string
7498            parent = '; '.join(parent)
7499        if not isinstance(work, str):
7500            work = '; '.join(work)
7501
7502        # replace any punctuation or numbers, with a space (to remove any
7503        # inconsistent punctuation and numbering) - (?u) specifies the
7504        # re.UNICODE flag in sub
7505        clean_parent = re.sub("(?u)[\W]", ' ', parent)
7506        # now allow the spaces to be filled with up to 2 non-letters
7507        pattern_parent = clean_parent.replace(" ", "\W{0,2}")
7508        pattern_parent = "(^|.*?\s)(\W*" + pattern_parent + "\W?)(.*)"
7509        # (removed previous alternative pattern for extend=true, owing to catastrophic backtracking)
7510        write_log(
7511                release_id,
7512                'info',
7513                "Pattern parent: %s, Work: %s",
7514                pattern_parent,
7515                work)
7516        p = re.compile(pattern_parent, re.IGNORECASE | re.UNICODE)
7517        m = p.search(work)
7518        if m:
7519            write_log(release_id, 'info', "Matched...")
7520            if m.group(1):
7521                stripped_work = m.group(1) + u"\u2026" + m.group(3)
7522            else:
7523                stripped_work = m.group(3)
7524            # may not have a full work name in the parent (missing op. no.
7525            # etc.)
7526            stripped_work = stripped_work.lstrip(":;,.- ")
7527        else:
7528            write_log(release_id, 'info', "No match...")
7529            stripped_work = work
7530
7531            if extend and options['cwp_common_chars'] > 0:
7532                # try stripping out a common substring (multiple times until
7533                # nothing more stripped)
7534                prev_stripped_work = ''
7535                counter = 1
7536                while prev_stripped_work != stripped_work:
7537                    if counter > 20:
7538                        break  # in case something went awry
7539                    prev_stripped_work = stripped_work
7540                    parent_tuples = self.listify(release_id, track, parent)
7541                    parent_words = parent_tuples['s_tuple']
7542                    clean_parent_words = list(parent_tuples['s_test_tuple'])
7543                    for w, word in enumerate(clean_parent_words):
7544                        clean_parent_words[w] = self.boil(release_id, word)
7545                    work_tuples = self.listify(
7546                        release_id, track, stripped_work)
7547                    work_words = work_tuples['s_tuple']
7548                    clean_work_words = list(work_tuples['s_test_tuple'])
7549                    for w, word in enumerate(clean_work_words):
7550                        clean_work_words[w] = self.boil(release_id, word)
7551                    common_dets = longest_common_substring(
7552                        clean_work_words, clean_parent_words)
7553                    # this is actually a list, not a string, since list
7554                    # arguments were supplied
7555                    common_seq = common_dets['string']
7556                    seq_length = common_dets['length']
7557                    seq_start = common_dets['start']
7558                    # the original items (before 'cleaning')
7559                    full_common_seq = [
7560                        x.group() for x in work_words[seq_start:seq_start + seq_length]]
7561                    # number of words in common_seq
7562                    full_seq_length = sum([len(x.split())
7563                                           for x in full_common_seq])
7564                    write_log(
7565                        release_id,
7566                        'info',
7567                        'Checking common sequence between parent and work, iteration %s ... parent_words = %s',
7568                        counter,
7569                        parent_words)
7570                    write_log(
7571                        release_id,
7572                        'info',
7573                        '... longest common sequence = %s',
7574                        common_seq)
7575                    if full_seq_length > 0:
7576                        potential_stripped_work = stripped_work
7577                        if seq_start > 0:
7578                            ellipsis = ' ' + u"\u2026" + ' '
7579                        else:
7580                            ellipsis = ''
7581                        if counter > 1:
7582                            potential_stripped_work = stripped_work.rstrip(
7583                                ' :,-\u2026')
7584                            potential_stripped_work = potential_stripped_work.replace(
7585                                '(\u2026)', '').rstrip()
7586                        potential_stripped_work = potential_stripped_work[:work_words[seq_start].start(
7587                        )] + ellipsis + potential_stripped_work[work_words[seq_start + seq_length - 1].end():]
7588                        potential_stripped_work = potential_stripped_work.lstrip(
7589                            ' :,-')
7590                        potential_stripped_work = re.sub(
7591                            r'(\W*…\W*)(\W*…\W*)', ' … ', potential_stripped_work)
7592                        potential_stripped_work = strip_excess_punctuation(
7593                            potential_stripped_work)
7594
7595                        if full_seq_length >= options['cwp_common_chars'] \
7596                                or potential_stripped_work == '' and options['cwp_allow_empty_parts']:
7597                            # Make sure it is more than the required min (it will be > 0 anyway)
7598                            # unless a full strip will result anyway (and blank
7599                            # part names are allowed)
7600                            stripped_work = potential_stripped_work
7601                            if not stripped_work or stripped_work == '':
7602                                if workId and \
7603                                        ('arrangement' in self.parts[workId] and self.parts[workId]['arrangement']
7604                                         and options['cwp_arrangements'] and options['cwp_arrangements_text']) \
7605                                        or ('partial' in self.parts[workId] and self.parts[workId]['partial']
7606                                            and options['cwp_partial'] and options['cwp_partial_text']) \
7607                                        and options['cwp_allow_empty_parts']:
7608                                    pass
7609                                else:
7610                                    stripped_work = prev_stripped_work  # do not allow empty parts
7611                    counter += 1
7612            stripped_work = strip_excess_punctuation(stripped_work)
7613            write_log(
7614                    release_id,
7615                    'info',
7616                    'stripped_work = %s',
7617                    stripped_work)
7618            if extend and parentId and parentId in self.works_cache:
7619                write_log(
7620                        release_id,
7621                        'info',
7622                        "Looking for match at next level up")
7623                grandparentIds = tuple(self.works_cache[parentId])
7624                grandparent = self.parts[grandparentIds]['name']
7625                stripped_work = self.strip_parent_from_work(
7626                    track,
7627                    release_id,
7628                    stripped_work,
7629                    grandparent,
7630                    part_level,
7631                    True,
7632                    grandparentIds,
7633                    workId)[0]
7634
7635        write_log(
7636                release_id,
7637                'info',
7638                "Finished strip_parent_from_work, Work: %s",
7639                work)
7640        write_log(release_id, 'info', "Stripped work: %s", stripped_work)
7641        # Changed full_parent to parent after removal of 'extend' logic above
7642        stripped_work = strip_excess_punctuation(stripped_work)
7643        write_log(release_id, 'info', "Stripped work after punctuation removal: %s", stripped_work)
7644        return stripped_work, parent
7645
7646    def diff_pair(
7647            self,
7648            release_id,
7649            track,
7650            tm,
7651            mb_item,
7652            title_item,
7653            remove_numbers=True):
7654        """
7655        Removes common text (or synonyms) from title item
7656        :param release_id: name for log file - usually =musicbrainz_albumid
7657        unless called outside metadata processor
7658        :param track:
7659        :param tm:
7660        :param mb_item:
7661        :param title_item:
7662        :param remove_numbers: remove movement numbers when comparing (not currently called with False by anything)
7663        :return: Reduced title item
7664        """
7665        write_log(release_id, 'debug', "Inside DIFF_PAIR")
7666        mb = mb_item.strip()
7667        write_log(release_id, 'info', "mb = %s", mb)
7668        write_log(release_id, 'info', "title_item = %s", title_item)
7669        if not mb:
7670            write_log(
7671                    release_id,
7672                    'info',
7673                    'End of DIFF_PAIR. Returning %s',
7674                    None)
7675            return None
7676        ti = title_item.strip(" :;-.,")
7677        if ti.count('"') == 1:
7678            ti = ti.strip('"')
7679        if ti.count("'") == 1:
7680            ti = ti.strip("'")
7681        write_log(release_id, 'info', "ti (amended) = %s", ti)
7682        if not ti:
7683            write_log(
7684                    release_id,
7685                    'info',
7686                    'End of DIFF_PAIR. Returning %s',
7687                    None)
7688            return None
7689
7690        if self.options[track]["cwp_removewords_p"]:
7691            removewords = self.options[track]["cwp_removewords_p"].split(',')
7692        else:
7693            removewords = []
7694        write_log(release_id, 'info', "Prefixes = %s", removewords)
7695        # remove numbers, roman numerals, part etc and punctuation from the
7696        # start
7697        write_log(release_id, 'info', "checking prefixes")
7698        found_prefix = True
7699        i = 0
7700        while found_prefix:
7701            if i > 20:
7702                break  # safety valve
7703            found_prefix = False
7704            for prefix in removewords:
7705                if prefix[0] != " ":
7706                    prefix2 = str(prefix).lower().lstrip()
7707                    write_log(
7708                            release_id, 'info', "checking prefix %s", prefix2)
7709                    if mb.lower().startswith(prefix2):
7710                        found_prefix = True
7711                        mb = mb[len(prefix2):]
7712                    if ti.lower().startswith(prefix2):
7713                        found_prefix = True
7714                        ti = ti[len(prefix2):]
7715            mb = mb.strip()
7716            ti = ti.strip()
7717            i += 1
7718            write_log(
7719                    release_id,
7720                    'info',
7721                    "pairs after prefix strip iteration %s. mb = %s, ti = %s",
7722                    i,
7723                    mb,
7724                    ti)
7725        write_log(release_id, 'info', "Prefixes checked")
7726
7727        #  replacements
7728        replacements = self.replacements[track]
7729        write_log(release_id, 'info', "Replacement: %s", replacements)
7730        for tup in replacements:
7731            for ind in range(0, len(tup) - 1):
7732                ti = re.sub(tup[ind], tup[-1], ti, flags=re.IGNORECASE)
7733        write_log(
7734                release_id,
7735                'debug',
7736                'Looking for any new words in the title')
7737
7738        write_log(
7739                release_id,
7740                'info',
7741                "Check before splitting: mb = %s, ti = %s",
7742                mb,
7743                ti)
7744
7745        ti_tuples = self.listify(release_id, track, ti)
7746        ti_tuple = ti_tuples['s_tuple']
7747        ti_test_tuple = ti_tuples['s_test_tuple']
7748
7749        mb_tuples = self.listify(release_id, track, mb)
7750        mb_test_tuple = mb_tuples['s_test_tuple']
7751
7752        write_log(
7753                release_id,
7754                'info',
7755                "Check after splitting: mb_test = %s, ti = %s, ti_test = %s",
7756                mb_test_tuple,
7757                ti_tuple,
7758                ti_test_tuple)
7759
7760        ti_stencil = self.stencil(release_id, ti_tuple, ti)
7761        ti_list = ti_stencil['match list']
7762        ti_list_punc = ti_stencil['gap list']
7763        ti_test_list = list(ti_test_tuple)
7764        if ti_stencil['dummy']:
7765            # to deal with case where stencil has added a dummy item at the
7766            # start
7767            ti_test_list.insert(0, '')
7768        write_log(release_id, 'info', 'ti_test_list = %r', ti_test_list)
7769        # zip is an iterable, not a list in Python 3, so make it re-usable
7770        ti_zip_list = list(zip(ti_list, ti_list_punc))
7771
7772        # len(ti_list) should be = len(ti_test_list) as only difference should
7773        # be synonyms which are each one 'word'
7774        # However, because of the grouping of some words via regex, it is possible that inconsistencies might arise
7775        # Therefore, there is a test here to check for equality and produce an
7776        # error message (but continue processing)
7777        if len(ti_list) != len(ti_test_list):
7778            write_log(
7779                    release_id,
7780                    'error',
7781                    'Mismatch in title list after canonization/synonymization')
7782            write_log(
7783                release_id,
7784                'error',
7785                'Orig. title list = %r. Test list = %r',
7786                ti_list,
7787                ti_test_list)
7788        # mb_test_tuple = self.listify(release_id, track, mb_test)
7789        mb_list2 = list(mb_test_tuple)
7790        for index, mb_bit2 in enumerate(mb_list2):
7791            mb_list2[index] = self.boil(release_id, mb_bit2)
7792            write_log(
7793                    release_id,
7794                    'info',
7795                    "mb_list2[%s] = %s",
7796                    index,
7797                    mb_list2[index])
7798        ti_new = []
7799        ti_rich_list = []
7800        for i, ti_bit_test in enumerate(ti_test_list):
7801            if i <= len(ti_list) - 1:
7802                ti_bit = ti_zip_list[i]
7803                # NB ti_bit is a tuple where the word (1st item) is grouped
7804                # with its following punctuation (2nd item)
7805            else:
7806                ti_bit = ('', '')
7807            write_log(
7808                    release_id,
7809                    'info',
7810                    "i = %s, ti_bit_test = %s, ti_bit = %s",
7811                    i,
7812                    ti_bit_test,
7813                    ti_bit)
7814            ti_rich_list.append((ti_bit, True))
7815            # Boolean to indicate whether ti_bit is a new word
7816
7817            if ti_bit_test == '':
7818                ti_rich_list[i] = (ti_bit, False)
7819            else:
7820                if self.boil(release_id, ti_bit_test) in mb_list2:
7821                    ti_rich_list[i] = (ti_bit, False)
7822
7823        if remove_numbers:  # Only remove numbers at the start if they are not new items
7824            p0 = re.compile(r'\b\w+\b')
7825            p1 = RE_ROMANS
7826            p2 = re.compile(r'^\d+')  # Matches positive integers
7827            starts_with_numeral = True
7828            while starts_with_numeral:
7829                starts_with_numeral = False
7830                if ti_rich_list and p0.match(ti_rich_list[0][0][0]):
7831                    start_word = p0.match(ti_rich_list[0][0][0]).group()
7832                    if p1.match(start_word) or p2.match(start_word):
7833                        if not ti_rich_list[0][1]:
7834                            starts_with_numeral = True
7835                            ti_rich_list.pop(0)
7836                            ti_test_list.pop(0)
7837
7838        write_log(
7839                release_id,
7840                'info',
7841                "ti_rich_list before removing singletons = %s. length = %s",
7842                ti_rich_list,
7843                len(ti_rich_list))
7844
7845        s = 0
7846        index = 0
7847        change = ()
7848        for i, (t, n) in enumerate(ti_rich_list):
7849            if n:
7850                s += 1
7851                index = i
7852                change = t  # NB this is a tuple
7853
7854        p = self.options[track]["cwp_proximity"]
7855        ep = self.options[track]["cwp_end_proximity"]
7856        # NB these may be modified later
7857
7858        if s == 1:
7859            if 0 < index < len(ti_rich_list) - 1:
7860                # ignore singleton new words in middle of title unless they are
7861                # within "cwp_end_proximity" from the start or end
7862                write_log(
7863                    release_id, 'info', 'item length is %s', len(
7864                        change[0].split()))
7865                # also make sure that the item is just one word before
7866                # eliminating
7867                if ep < index < len(ti_rich_list) - ep - \
7868                        1 and len(change[0].split()) == 1:
7869                    ti_rich_list[index] = (change, False)
7870                    s = 0
7871
7872        # remove prepositions
7873        write_log(
7874                release_id,
7875                'info',
7876                "ti_rich_list before removing prepositions = %s. length = %s",
7877                ti_rich_list,
7878                len(ti_rich_list))
7879        if self.options[track]["cwp_prepositions"]:
7880            prepositions_fat = self.options[track]["cwp_prepositions"].split(
7881                ',')
7882            prepositions = [w.strip() for w in prepositions_fat]
7883            for i, ti_bit_test in enumerate(
7884                    reversed(ti_test_list)):  # Need to reverse it to check later prepositions first
7885                if ti_bit_test.lower().strip() in prepositions:
7886                    # NB i is counting up while traversing the list backwards
7887                    j = len(ti_rich_list) - i - 1
7888                    if i == 0 or not ti_rich_list[j + 1][1]:
7889                        # Don't make it false if it is preceded by a
7890                        # non-preposition new word
7891                        if not (j > 0 and ti_rich_list[j -
7892                                                       1][1] and ti_test_list[j -
7893                                                                              1].lower() not in prepositions):
7894                            ti_rich_list[j] = (ti_rich_list[j][0], False)
7895
7896        # create comparison for later usage
7897        compare_string = ''
7898        for item in ti_rich_list:
7899            if item[1]:
7900                compare_string += item[0][0]
7901        ti_compare = self.boil(release_id, compare_string)
7902        compare_length = len(ti_compare)
7903
7904        write_log(
7905                release_id,
7906                'info',
7907                "ti_rich_list before gapping (True indicates a word in title not in MB work) = %s. length = %s",
7908                ti_rich_list,
7909                len(ti_rich_list))
7910        if s > 0:
7911            d = p - ep
7912            start = True  # To keep track of new words at the start of the title
7913            for i, (ti_bit, new) in enumerate(ti_rich_list):
7914                if not new:
7915                    write_log(
7916                            release_id,
7917                            'info',
7918                            "item(i = %s) val = %s - not new. proximity param = %s, end_proximity param = %s",
7919                            i,
7920                            ti_bit,
7921                            p,
7922                            ep)
7923                    if start:
7924                        prox_test = ep
7925                    else:
7926                        prox_test = p
7927                    if prox_test > 0:
7928                        for j in range(0, prox_test + 1):
7929                            write_log(release_id, 'info', "item(i) = %s, look-ahead(j) = %s", i, j)
7930                            if i + j < len(ti_rich_list):
7931                                if ti_rich_list[i + j][1]:
7932                                    write_log(
7933                                            release_id, 'info', "Set to true..")
7934                                    ti_rich_list[i] = (ti_bit, True)
7935                                    write_log(
7936                                            release_id, 'info', "...set OK")
7937                            else:
7938                                if j <= p - d:
7939                                    ti_rich_list[i] = (ti_bit, True)
7940                else:
7941                    p = self.options[track]["cwp_proximity"]
7942                    start = False
7943                if not ti_rich_list[i][1]:
7944                    p -= 1
7945                    ep -= 1
7946        write_log(
7947                release_id,
7948                'info',
7949                "ti_rich_list after gapping (True indicates new words plus infills) = %s",
7950                ti_rich_list)
7951        nothing_new = True
7952        for (ti_bit, new) in ti_rich_list:
7953            if new:
7954                nothing_new = False
7955                new_prev = True
7956                break
7957        if nothing_new:
7958            write_log(
7959                    release_id,
7960                    'info',
7961                    'End of DIFF_PAIR. Returning %s',
7962                    None)
7963            return None
7964        else:
7965            new_prev = False
7966            for i, (ti_bit, new) in enumerate(ti_rich_list):
7967                write_log(release_id, 'info', "Create new for %s?", ti_bit)
7968                if new:
7969                    write_log(release_id, 'info', "Yes for %s", ti_bit)
7970                    if not new_prev:
7971                        if i > 0:
7972                            # check to see if the last char of the prev
7973                            # punctuation group needs to be added first
7974                            if len(ti_rich_list[i - 1][0][1]) > 1:
7975                                # i.e. ti_bit[1][-1] of previous loop
7976                                ti_new.append(ti_rich_list[i - 1][0][1][-1])
7977                    ti_new.append(ti_bit[0])
7978                    if len(ti_bit[1]) > 1:
7979                        if i < len(ti_rich_list) - 1:
7980                            if ti_rich_list[i + 1][1]:
7981                                ti_new.append(ti_bit[1])
7982                            else:
7983                                ti_new.append(ti_bit[1][:-1])
7984                        else:
7985                            ti_new.append(ti_bit[1])
7986                    else:
7987                        ti_new.append(ti_bit[1])
7988                    write_log(
7989                            release_id,
7990                            'info',
7991                            "appended %s. ti_new is now %s",
7992                            ti_bit,
7993                            ti_new)
7994                else:
7995                    write_log(release_id, 'info', "Not for %s", ti_bit)
7996                    if new != new_prev:
7997                        ti_new.append(u"\u2026" + ' ')
7998
7999                new_prev = new
8000        if ti_new:
8001            write_log(release_id, 'info', "ti_new %s", ti_new)
8002            ti = ''.join(ti_new)
8003            write_log(release_id, 'info', "New text from title = %s", ti)
8004        else:
8005            write_log(release_id, 'info', "New text empty")
8006            write_log(
8007                    release_id,
8008                    'info',
8009                    'End of DIFF_PAIR. Returning %s',
8010                    None)
8011            return None
8012        # see if there is any significant difference between the strings
8013        if ti:
8014            nopunc_ti = ti_compare  # was  = self.boil(release_id, ti)
8015            # not necessary as already set?
8016            nopunc_mb = self.boil(release_id, mb)
8017            # ti_len = len(nopunc_ti) use compare_length instead (= len before
8018            # removals and additions)
8019            substring_proportion = float(
8020                self.options[track]["cwp_substring_match"]) / 100
8021            sub_len = compare_length * substring_proportion
8022            if substring_proportion < 1:
8023                write_log(release_id, 'info', "test sub....")
8024                lcs = longest_common_substring(nopunc_mb, nopunc_ti)['string']
8025                write_log(
8026                        release_id,
8027                        'info',
8028                        "Longest common substring is: %s. Threshold length is %s",
8029                        lcs,
8030                        sub_len)
8031                if len(lcs) >= sub_len:
8032                    write_log(
8033                            release_id,
8034                            'info',
8035                            'End of DIFF_PAIR. Returning %s',
8036                            None)
8037                    return None
8038            write_log(release_id, 'info', "...done, ti =%s", ti)
8039        # remove duplicate successive words (and remove first word of title
8040        # item if it duplicates last word of mb item)
8041        if ti:
8042            ti_list_new = re.split(' ', ti)
8043            ti_list_ref = ti_list_new
8044            ti_bit_prev = None
8045            for i, ti_bit in enumerate(ti_list_ref):
8046                if ti_bit != "...":
8047
8048                    if i > 1:
8049                        if self.boil(
8050                                release_id, ti_bit) == self.boil(
8051                                release_id, ti_bit_prev):
8052                            dup = ti_list_new.pop(i)
8053                            write_log(release_id, 'info', "...removed dup %s", dup)
8054
8055                ti_bit_prev = ti_bit
8056            if ti_list_new and mb_list2:
8057                write_log(release_id,
8058                          'info',
8059                          "1st word of ti = %s. Last word of mb = %s",
8060                          ti_list_new[0],
8061                          mb_list2[-1])
8062                if self.boil(release_id, ti_list_new[0]) == mb_list2[-1]:
8063                    write_log(release_id, 'info', "Removing 1st word from ti...")
8064                    first = ti_list_new.pop(0)
8065                    write_log(release_id, 'info', "...removed %s", first)
8066            else:
8067                write_log(
8068                        release_id,
8069                        'info',
8070                        'End of DIFF_PAIR. Returning %s',
8071                        None)
8072                return None
8073            if ti_list_new:
8074                ti = ' '.join(ti_list_new)
8075            else:
8076                write_log(
8077                        release_id,
8078                        'info',
8079                        'End of DIFF_PAIR. Returning %s',
8080                        None)
8081                return None
8082        # remove excess brackets and punctuation
8083        if ti:
8084            ti = strip_excess_punctuation(ti)
8085            write_log(release_id, 'info', "stripped punc ok. ti = %s", ti)
8086        write_log(
8087                release_id,
8088                'debug',
8089                "DIFF_PAIR is returning ti = %s",
8090                ti)
8091        if ti and len(ti) > 0:
8092            write_log(
8093                    release_id,
8094                    'info',
8095                    'End of DIFF_PAIR. Returning %s',
8096                    ti)
8097            return ti
8098        else:
8099            write_log(
8100                    release_id,
8101                    'info',
8102                    'End of DIFF_PAIR. Returning %s',
8103                    None)
8104            return None
8105
8106
8107    @staticmethod
8108    def canonize_opus(release_id, track, s):
8109        """
8110        make opus numbers etc. into one-word items
8111        :param release_id:
8112        :param track:
8113        :param s: A string
8114        :return:
8115        """
8116        write_log(release_id, 'debug', 'Canonizing: %s', s)
8117        # Canonize catalogue & opus numbers (e.g. turn K. 126 into K126 or K
8118        # 345a into K345a or op. 144 into op144):
8119        regex = re.compile(
8120            r'\b((?:op|no|k|kk|kv|L|B|Hob|S|D|M)|\w+WV)\W?\s?(\d+\-?\u2013?\u2014?\d*\w*)\b',
8121            re.IGNORECASE)
8122        regex_match = regex.search(s)
8123        s_canon = s
8124        if regex_match and len(regex_match.groups()) == 2:
8125            pt1 = regex_match.group(1) or ''
8126            pt2 = regex_match.group(2) or ''
8127            if regex_match.group(1) and regex_match.group(2):
8128                pt1 = re.sub(
8129                    r'^\W*no\b',
8130                    '',
8131                    regex_match.group(1),
8132                    flags=re.IGNORECASE)
8133            s_canon = pt1 + pt2
8134        write_log(release_id, 'info', 'canonized item = %s', s_canon)
8135        return s_canon
8136
8137    @staticmethod
8138    def canonize_key(release_id, track, s):
8139        """
8140        make keys into standardized one-word items
8141        :param release_id:
8142        :param track:
8143        :param s: A string
8144        :return:
8145        """
8146        write_log(release_id, 'debug', 'Canonizing: %s', s)
8147        match = RE_KEYS.search(s)
8148        s_canon = s
8149        if match:
8150            if match.group(2):
8151                k2 = re.sub(
8152                    r'\-sharp|\u266F',
8153                    'sharp',
8154                    match.group(2),
8155                    flags=re.IGNORECASE)
8156                k2 = re.sub(r'\-flat|\u266D', 'flat', k2, flags=re.IGNORECASE)
8157                k2 = k2.replace('-', '')
8158            else:
8159                k2 = ''
8160            if not match.group(3) or match.group(
8161                    3).strip() == '':  # if the scale is not given, assume it is the major key
8162                if match.group(1).isupper(
8163                ) or k2 != '':  # but only if it is upper case or has an accent
8164                    k3 = 'major'
8165                else:
8166                    k3 = ''
8167            else:
8168                k3 = match.group(3).strip()
8169            s_canon = match.group(1).strip() + k2.strip() + k3
8170        write_log(release_id, 'info', 'canonized item = %s', s_canon)
8171        return s_canon
8172
8173    @staticmethod
8174    def canonize_synonyms(release_id, tuples, s):
8175        """
8176        make synonyms equal
8177        :param release_id:
8178        :param tuples
8179        :param s: A string
8180        :return:
8181        """
8182        write_log(release_id, 'debug', 'Canonizing: %s', s)
8183        s_canon = s
8184        syn_patterns = []
8185        syn_subs = []
8186        for syn_tup in tuples:
8187            syn_pattern = r'((?:^|\W)' + \
8188                r'(?:$|\W)|(?:^|\W)'.join(syn_tup) + r'(?:$|\W))'
8189            syn_patterns.append(syn_pattern)
8190            # to get the last synonym in the tuple - the canonical form
8191            syn_sub = syn_tup[-1:][0]
8192            syn_subs.append(syn_sub)
8193        for syn_ind, pattern in enumerate(syn_patterns):
8194            regex = re.compile(pattern, re.IGNORECASE)
8195            regex_match = regex.search(s)
8196            if regex_match:
8197                test_reg = regex_match.group().strip()
8198                s_canon = s_canon.replace(test_reg, syn_subs[syn_ind])
8199
8200        write_log(release_id, 'info', 'canonized item = %s', s_canon)
8201        return s_canon
8202
8203    def find_synonyms(self, release_id, track, reg_item):
8204        """
8205        extend regex item to include synonyms
8206        :param release_id:
8207        :param track:
8208        :param reg_item: A regex portion
8209        :return: reg_new: A replacement for reg_item that includes all its synonyms
8210         (if reg_item matches the last in a synonym tuple)
8211        """
8212        write_log(release_id, 'debug', 'Finding synonyms of: %s', reg_item)
8213        syn_others = []
8214        syn_all = []
8215        for syn_tup in self.synonyms[track]:
8216            # to get the last synonym in the tuple - the canonical form
8217            syn_last = syn_tup[-1:][0]
8218            if re.match(r'^\s*' + reg_item + r'\s*$', syn_last, re.IGNORECASE):
8219                syn_others += syn_tup[:-1]
8220                syn_all += syn_tup
8221        if syn_others:
8222            reg_item = '(?:' + ')|(?:'.join(syn_others) + \
8223                ')|(?:' + reg_item + ')'
8224
8225        write_log(release_id, 'info', 'new regex item = %s', reg_item)
8226        return reg_item, syn_all
8227
8228    def listify(self, release_id, track, s):
8229        """
8230        Turn a string into a list of 'words', where words may also be phrases which
8231        are then 'canonized' - i.e. turned into equivalents for comparison purposes
8232        :param release_id:
8233        :param track:
8234        :param s: string
8235        :return: s_tuple: a tuple of all the **match objects** (re words and defined phrases)
8236                 s_test_tuple: a tuple of the matched and canonized words and phrases (i.e. a tuple of strings, not objects)
8237        """
8238        tuples = self.synonyms[track]
8239        # just list anything that is a synonym (with word boundary markers)
8240        syn_pattern = '|'.join(
8241            [r'(?:^|\W|\b)' + x + r'(?:$|\W)' for y in self.synonyms[track] for x in y])
8242        op = self.find_synonyms(
8243            release_id,
8244            track,
8245            r'(?:op|no|k|kk|kv|L|B|Hob|S|D|M|\w+WV)')
8246        op_groups = op[0]
8247        op_all = op[1]
8248        notes = self.find_synonyms(release_id, track, r'[ABCDEFG]')
8249        notes_groups = notes[0]
8250        notes_all = notes[1]
8251        sharp = self.find_synonyms(release_id, track, r'sharp')
8252        sharp_groups = sharp[0]
8253        sharp_all = sharp[1]
8254        flat = self.find_synonyms(release_id, track, r'flat')
8255        flat_groups = flat[0]
8256        flat_all = flat[1]
8257        major = self.find_synonyms(release_id, track, r'major')
8258        major_groups = major[0]
8259        major_all = major[1]
8260        minor = self.find_synonyms(release_id, track, r'minor')
8261        minor_groups = minor[0]
8262        minor_all = minor[1]
8263        opus_pattern = r"(?:\b((?:(" + op_groups + \
8264            r"))\W?\s?\d+\-?\u2013?\u2014?\d*\w*)\b)"
8265        note_pattern = r"(\b" + notes_groups + r")"
8266        accent_pattern = r"(?:\-(" + sharp_groups + r")(?:\s+|\b)|\-(" + flat_groups + r")(?:\s+|\b)|\s(" + sharp_groups + \
8267                         r")(?:\s+|\b)|\s(" + flat_groups + r")(?:\s+|\b)|\u266F(?:\s+|\b)|\u266D(?:\s+|\b)|(?:[:,.]?\s+|$|\-))"
8268        scale_pattern = r"(?:((" + major_groups + \
8269            r")|(" + minor_groups + r"))?\b)"
8270        key_pattern = note_pattern + accent_pattern + scale_pattern
8271        hyphen_split_pattern = r"(?:\b|\"|\')(\w+['’]?\w*)|(?:\b\w+\b)|(\B\&\B)"
8272        # treat em-dash and en-dash as hyphens
8273        hyphen_embed_pattern = r"(?:\b|\"|\')(\w+['’\-\u2013\u2014]?\w*)|(?:\b\w+\b)|(\B\&\B)"
8274
8275        # The regex is split into two iterations as putting it all together can have unpredictable consequences
8276        # - may match synonyms before op's even though that is later in the string
8277
8278        # First match the op's and keys
8279        regex_1 = opus_pattern + r"|(" + key_pattern + r")"
8280        matches_1 = re.finditer(regex_1, s, re.UNICODE | re.IGNORECASE)
8281        s_list = []
8282        s_test_list = []
8283        s_scrubbed = s
8284        all_synonyms_lists = [
8285            op_all,
8286            notes_all,
8287            sharp_all,
8288            flat_all,
8289            sharp_all,
8290            flat_all,
8291            major_all,
8292            minor_all]
8293        matches_list = [2, 4, 5, 6, 7, 8, 10, 11]
8294        for match in matches_1:
8295            test_a = match.group()
8296            match_a = []
8297            match_a.append(match.group())
8298            for j in range(1, 12):
8299                match_a.append(match.group(j))
8300            # 0. overall match
8301            # 1. overall opus match
8302            # 2. 2-char op match
8303            # 3. overall key match
8304            # 4. note match
8305            # 5. hyphenated sharp match
8306            # 6. hyphenated flat match
8307            # 7. non-hyphenated sharp match
8308            # 8. non-hyphenated flat match
8309            # 9. overall scale match
8310            # 10. major match
8311            # 11. minor match
8312            for i, all_synonyms_list in enumerate(all_synonyms_lists):
8313                if all_synonyms_list and match_a[matches_list[i]]:
8314                    match_regex = [re.match(pattern, match_a[matches_list[i]], re.IGNORECASE).group()
8315                                   for pattern in all_synonyms_list
8316                                   if re.match(pattern, match_a[matches_list[i]], re.IGNORECASE)]
8317                    if match_regex:
8318                        match_a[matches_list[i]] = self.canonize_synonyms(
8319                            release_id, tuples, match_a[matches_list[i]])
8320                        test_a = re.sub(r"\b" + match_regex[0] + r"(?:\b|$|\s|\.)",
8321                                        match_a[matches_list[i]],
8322                                        test_a, flags=re.IGNORECASE)
8323            if match_a[1]:
8324                clean_opus = test_a.strip(' ,.:;/-?"')
8325                test_a = re.sub(
8326                    re.escape(clean_opus),
8327                    self.canonize_opus(
8328                        release_id,
8329                        track,
8330                        clean_opus),
8331                    test_a,
8332                    flags=re.IGNORECASE)
8333            if match_a[3]:
8334                clean_key = test_a.strip(' ,.:;/-?"')
8335                test_a = re.sub(
8336                    re.escape(clean_key),
8337                    self.canonize_key(
8338                        release_id,
8339                        track,
8340                        clean_key),
8341                    test_a,
8342                    flags=re.IGNORECASE)
8343
8344            s_test_list.append(test_a)
8345            s_list.append(match)
8346            s_scrubbed_list = list(s_scrubbed)
8347            for char in range(match.start(), match.end()):
8348                if len(s_scrubbed_list) >= match.end():  # belt and braces
8349                    s_scrubbed_list[char] = '#'
8350            s_scrubbed = ''.join(s_scrubbed_list)
8351
8352        # Then match the synonyms and remaining words
8353        if self.options[track]["cwp_split_hyphenated"]:
8354            regex_2 = r"(" + syn_pattern + r")|" + hyphen_split_pattern
8355            # allow ampersands and non-latin characters as word characters. Treat apostrophes as part of words.
8356            # Treat opus and catalogue entries - e.g. K. 657 or OP.5 or op. 35a or CD 144 or BWV 243a - as one word
8357            # also treat ranges of opus numbers (connected by dash, en dash or
8358            # em dash) as one word
8359        else:
8360            regex_2 = r"(" + syn_pattern + r")|" + hyphen_embed_pattern
8361            # as previous but also treat embedded hyphens as part of words.
8362        matches_2 = re.finditer(
8363            regex_2, s_scrubbed, re.UNICODE | re.IGNORECASE)
8364        for match in matches_2:
8365            if match.group(1) and match.group(1) == match.group():
8366                s_test_list.append(
8367                    self.canonize_synonyms(
8368                        release_id,
8369                        tuples,
8370                        match.group(1)))  # synonym
8371            else:
8372                s_test_list.append(match.group())
8373            s_list.append(match)
8374        if s_list:
8375            s_zip = list(zip(s_list, s_test_list))
8376            s_list, s_test_list = zip(
8377                *sorted(s_zip, key=lambda tup: tup[0].start()))
8378        s_tuple = tuple(s_list)
8379        s_test_tuple = tuple(s_test_list)
8380        return {'s_tuple': s_tuple, 's_test_tuple': s_test_tuple}
8381
8382    def get_text_tuples(self, release_id, track, text_type):
8383        """
8384        Return synonym or 'replacement' tuples
8385        :param release_id:
8386        :param track:
8387        :param text_type: 'replacements' or 'synonyms'
8388        Note that code in this method refers to synonyms (as that was written first), but applies equally to replacements and ui_tags
8389        :return:
8390        """
8391        tm = track.metadata
8392        strsyns = re.split(r'(?<!\\)/',
8393                           self.options[track]["cwp_" + text_type])
8394        synonyms = []
8395        for syn in strsyns:
8396            tup_match = re.search(r'\((.*)\)', syn)
8397            if tup_match:
8398                # to ignore escaped commas
8399                tup = re.split(r'(?<!\\),', tup_match.group(1))
8400            else:
8401                tup = ''
8402            if len(tup) >= 2:
8403                for i, ts in enumerate(tup):
8404                    tup[i] = ts.strip("' ").strip('"')
8405                    if len(
8406                            tup[i]) > 4 and tup[i][0] == "!" and tup[i][1] == "!" and tup[i][-1] == "!" and tup[i][-2] == "!":
8407                        # we have a reg ex inside - this deals with legacy
8408                        # replacement text where enclosure in double-shouts was
8409                        # required
8410                        tup[i] = tup[i][2:-2]
8411                    if (i < len(tup) - 1 or text_type ==
8412                            'synonyms') and not tup[i]:
8413                        write_log(
8414                            release_id,
8415                            'warning',
8416                            '%s: entries must not be blank - error in %s',
8417                            text_type,
8418                            syn)
8419                        if self.WARNING:
8420                            self.append_tag(
8421                            release_id,
8422                            tm,
8423                            '~cwp_warning',
8424                            '7. ' + text_type + ': entries must not be blank - error in ' + syn)
8425                        tup[i] = "**BAD**"
8426                    elif [tup for t in synonyms if tup[i] in t]:
8427                        write_log(
8428                            release_id,
8429                            'warning',
8430                            '%s: keys cannot duplicate any in existing %s - error in %s '
8431                            '- omitted from %s. To fix, place all %s in one tuple.',
8432                            text_type,
8433                            text_type,
8434                            syn,
8435                            text_type,
8436                            text_type)
8437                        if self.WARNING:
8438                            self.append_tag(release_id, tm, '~cwp_warning',
8439                                        '7. ' + text_type + ': keys cannot duplicate any in existing ' + text_type + ' - error in ' +
8440                                        syn + ' - omitted from ' + text_type + '. To fix, place all ' + text_type + ' in one tuple.')
8441                        tup[i] = "**BAD**"
8442                if "**BAD**" in tup:
8443                    continue
8444                else:
8445                    synonyms.append(tup)
8446            else:
8447                write_log(
8448                    release_id,
8449                    'warning',
8450                    'Error in %s format for %s',
8451                    text_type,
8452                    syn)
8453                if self.WARNING:
8454                    self.append_tag(
8455                    release_id,
8456                    tm,
8457                    '~cwp_warning',
8458                    '7. Error in ' +
8459                    text_type +
8460                    ' format for ' +
8461                    syn)
8462        write_log(release_id, 'info', "%s: %s", text_type, synonyms)
8463        return synonyms
8464
8465    @staticmethod
8466    def stencil(release_id, matches_tuple, test_string):
8467        """
8468        Produce lists of matching items, AND the items in between, in equal length lists
8469        :param release_id:
8470        :param matches_tuple: tuple of regex matches
8471        :param test_string: original string used in regex
8472        :return: 'match list' - list of matched strings, 'gap list' - list of strings in gaps between matches
8473        """
8474        match_items = []
8475        gap_items = []
8476        dummy = False
8477        pointer = 0
8478        write_log(
8479                release_id,
8480                'debug',
8481                'In fn stencil. test_string = %s. matches_tuple = %s',
8482                test_string,
8483                matches_tuple)
8484        for match_num, match in enumerate(matches_tuple):
8485            start = match.start()
8486            end = match.end()
8487            if start > pointer:
8488                if pointer == 0:
8489                    # add a null word item at start to keep the lists the same
8490                    # length
8491                    match_items.append('')
8492                    dummy = True
8493                gap_items.append(test_string[pointer:start])
8494            else:
8495                if pointer > 0:
8496                    # shouldn't happen, but just in case there are two word
8497                    # items with no gap
8498                    gap_items.append('')
8499            match_items.append(test_string[start:end])
8500            pointer = end
8501            if match_num + 1 == len(matches_tuple):
8502                # pick up any punc items at end
8503                gap_items.append(test_string[pointer:])
8504        return {
8505            'match list': match_items,
8506            'gap list': gap_items,
8507            'dummy': dummy}
8508
8509    def boil(self, release_id, s):
8510        """
8511        Remove punctuation, spaces, capitals and accents for string comparisons
8512        :param release_id: name for log file - usually =musicbrainz_albumid
8513        unless called outside metadata processor
8514        :param s:
8515        :return:
8516        """
8517        write_log(release_id, 'debug', "boiling %s", s)
8518        s = s.lower()
8519        s = replace_roman_numerals(s)
8520        s = s.replace('sch', 'sh')\
8521            .replace(u'\xdf', 'ss')\
8522            .replace('sz', 'ss')\
8523            .replace(u'\u0153', 'oe')\
8524            .replace('oe', 'o')\
8525            .replace(u'\u00fc', 'ue')\
8526            .replace('ue', 'u')\
8527            .replace(u'\u00e6', 'ae')\
8528            .replace('ae', 'a')\
8529            .replace(u'\u266F', 'sharp')\
8530            .replace(u'\u266D', 'flat')\
8531            .replace(u'\u2013', '-')\
8532            .replace(u'\u2014', '-')
8533        # first term above is to remove the markers used for synonyms, to
8534        # enable a true comparison
8535        punc = re.compile(r'\W*', re.ASCII)
8536        s = ''.join(
8537            c for c in unicodedata.normalize(
8538                'NFD',
8539                s) if unicodedata.category(c) != 'Mn')
8540        boiled = punc.sub('', s).strip().lower().rstrip("s'")
8541        write_log(release_id, 'debug', "boiled result = %s", boiled)
8542        return boiled
8543
8544
8545################
8546# OPTIONS PAGE #
8547################
8548
8549class ClassicalExtrasOptionsPage(OptionsPage):
8550    NAME = "classical_extras"
8551    TITLE = "Classical Extras"
8552    PARENT = "plugins"
8553    opts = plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') +\
8554        plugin_options('workparts') + plugin_options('genres') + plugin_options('other')
8555
8556    options = [
8557        IntOption("persist", 'ce_tab', 0)
8558    ]
8559    # custom logging for non-album-related messages is written to session.log
8560    for opt in opts:
8561        if 'type' in opt:
8562            if 'default' in opt:
8563                default = opt['default']
8564            else:
8565                default = ""
8566            if opt['type'] == 'Boolean':
8567                options.append(BoolOption("setting", opt['option'], default))
8568            elif opt['type'] == 'Text' or opt['type'] == 'Combo' or opt['type'] == 'PlainText':
8569                options.append(TextOption("setting", opt['option'], default))
8570            elif opt['type'] == 'Integer':
8571                options.append(IntOption("setting", opt['option'], default))
8572            else:
8573                write_log(
8574                    "session",
8575                    'error',
8576                    "Error in setting options for option = %s",
8577                    opt['option'])
8578
8579    def __init__(self, parent=None):
8580        super(ClassicalExtrasOptionsPage, self).__init__(parent)
8581        self.ui = Ui_ClassicalExtrasOptionsPage()
8582        self.ui.setupUi(self)
8583
8584    def load(self):
8585        """
8586        Load the options - NB all options are set in plugin_options, so this just parses that
8587        :return:
8588        """
8589        opts = plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') +\
8590            plugin_options('workparts') + plugin_options('genres') + plugin_options('other')
8591
8592        # To force a toggle so that signal given
8593        toggle_list = ['use_cwp',
8594                       'use_cea',
8595                       'cea_override',
8596                       'cwp_override',
8597                       'cea_ra_use',
8598                       'cea_split_lyrics',
8599                       'cwp_partial',
8600                       'cwp_arrangements',
8601                       'cwp_medley',
8602                       'cwp_use_muso_refdb',
8603                       'ce_show_ui_tags',]
8604
8605        # open at last used tab
8606        if 'ce_tab' in config.persist:
8607            cfg_val = config.persist['ce_tab'] or 0
8608            if 0 <= cfg_val <= 5:
8609                self.ui.tabWidget.setCurrentIndex(cfg_val)
8610        else:
8611            self.ui.tabWidget.setCurrentIndex(0)
8612
8613        for opt in opts:
8614            if opt['option'] == 'classical_work_parts':
8615                ui_name = 'use_cwp'
8616            elif opt['option'] == 'classical_extra_artists':
8617                ui_name = 'use_cea'
8618            else:
8619                ui_name = opt['option']
8620            if ui_name in toggle_list:
8621                not_setting = not self.config.setting[opt['option']]
8622                self.ui.__dict__[ui_name].setChecked(not_setting)
8623
8624            if opt['type'] == 'Boolean':
8625                self.ui.__dict__[ui_name].setChecked(
8626                    self.config.setting[opt['option']])
8627            elif opt['type'] == 'Text':
8628                self.ui.__dict__[ui_name].setText(
8629                    self.config.setting[opt['option']])
8630            elif opt['type'] == 'PlainText':
8631                self.ui.__dict__[ui_name].setPlainText(
8632                    self.config.setting[opt['option']])
8633            elif opt['type'] == 'Combo':
8634                self.ui.__dict__[ui_name].setEditText(
8635                    self.config.setting[opt['option']])
8636            elif opt['type'] == 'Integer':
8637                self.ui.__dict__[ui_name].setValue(
8638                    self.config.setting[opt['option']])
8639            else:
8640                write_log(
8641                    'session',
8642                    'error',
8643                    "Error in loading options for option = %s",
8644                    opt['option'])
8645
8646    def save(self):
8647        opts = plugin_options('artists') + plugin_options('tag') + plugin_options('tag_detail') +\
8648            plugin_options('workparts') + plugin_options('genres') + plugin_options('other')
8649
8650        # save tab setting
8651        config.persist['ce_tab'] = self.ui.tabWidget.currentIndex()
8652
8653        for opt in opts:
8654            if opt['option'] == 'classical_work_parts':
8655                ui_name = 'use_cwp'
8656            elif opt['option'] == 'classical_extra_artists':
8657                ui_name = 'use_cea'
8658            else:
8659                ui_name = opt['option']
8660            if opt['type'] == 'Boolean':
8661                self.config.setting[opt['option']] = self.ui.__dict__[
8662                    ui_name].isChecked()
8663            elif opt['type'] == 'Text':
8664                self.config.setting[opt['option']] = str(
8665                    self.ui.__dict__[ui_name].text())
8666            elif opt['type'] == 'PlainText':
8667                self.config.setting[opt['option']] = str(
8668                    self.ui.__dict__[ui_name].toPlainText())
8669            elif opt['type'] == 'Combo':
8670                self.config.setting[opt['option']] = str(
8671                    self.ui.__dict__[ui_name].currentText())
8672            elif opt['type'] == 'Integer':
8673                self.config.setting[opt['option']
8674                                    ] = self.ui.__dict__[ui_name].value()
8675            else:
8676                write_log(
8677                    'session',
8678                    'error',
8679                    "Error in saving options for option = %s",
8680                    opt['option'])
8681
8682
8683#################
8684# MAIN ROUTINE  #
8685#################
8686
8687# custom logging for non-album-related messages is written to session.log
8688write_log('session', 'basic', 'Loading ' + PLUGIN_NAME)
8689
8690# SET UI COLUMNS FOR PICARD RHS
8691if config.setting['ce_show_ui_tags'] and config.setting['ce_ui_tags']:
8692    from picard.ui.itemviews import MainPanel
8693    UI_TAGS = get_ui_tags().items()
8694    for heading, tag_names in UI_TAGS:
8695        heading_tag = '~' + heading + '_VAL'
8696        MainPanel.columns.append((N_(heading), heading_tag))
8697    write_log('session', 'info', 'UI_TAGS')
8698    write_log('session', 'info', UI_TAGS)
8699
8700
8701# set defaults for certain options that MUST be manually changed by the
8702# user each time they are to be over-ridden
8703config.setting['use_cache'] = True
8704config.setting['ce_options_overwrite'] = False
8705config.setting['track_ars'] = True
8706config.setting['release_ars'] = True
8707
8708
8709# REFERENCE DATA
8710REF_DICT = get_references_from_file(
8711    'session',
8712    config.setting['cwp_muso_path'],
8713    config.setting['cwp_muso_refdb'])
8714write_log('session', 'info', 'External references (Muso):')
8715write_log('session', 'info', REF_DICT)
8716COMPOSER_DICT = REF_DICT['composers']
8717if config.setting['cwp_muso_classical'] and not COMPOSER_DICT:
8718    write_log('session', 'error', 'No composer roster found')
8719for cd in COMPOSER_DICT:
8720    cd['lc_name'] = [c.lower() for c in cd['name']]
8721    cd['lc_sort'] = [c.lower() for c in cd['sort']]
8722PERIOD_DICT = REF_DICT['periods']
8723if (config.setting['cwp_muso_dates']
8724        or config.setting['cwp_muso_periods']) and not PERIOD_DICT:
8725    write_log('session', 'error', 'No period map found')
8726GENRE_DICT = REF_DICT['genres']
8727if config.setting['cwp_muso_genres'] and not GENRE_DICT:
8728    write_log('session', 'error', 'No classical genre list found')
8729
8730# API CALLS
8731register_track_metadata_processor(PartLevels().add_work_info)
8732register_track_metadata_processor(ExtraArtists().add_artist_info)
8733register_options_page(ClassicalExtrasOptionsPage)
8734
8735# END
8736write_log('session', 'basic', 'Finished intialisation')
8737