1# -*- coding: utf-8 -*-
2# ------------------------------------------------------------------------------
3# Name:         features.native.py
4# Purpose:      music21 feature extractors
5#
6# Authors:      Christopher Ariza
7#
8# Copyright:    Copyright © 2011 Michael Scott Cuthbert and the music21 Project
9# License:      BSD, see license.txt
10# ------------------------------------------------------------------------------
11'''
12Original music21 feature extractors.
13'''
14import unittest
15from typing import Optional
16
17from music21.features import base as featuresModule
18from music21 import text
19from music21 import environment
20_MOD = 'features.native'
21environLocal = environment.Environment(_MOD)
22
23# ------------------------------------------------------------------------------
24# ideas for other music21 features extractors
25
26# notation features: clef usage, enharmonic usage
27# chromatic alteration related to beat position
28
29# key signature histogram
30# array of circle of fifths
31
32# lyrics
33# Luca Gloria:
34# searching for numbers of hits
35# vowel metrical position
36# idea of language/text specific -- DONE
37
38# Essen locale and elevation
39
40# automatic key analysis
41# as a method of feature extraction
42
43# key detection on windowed segments
44# prevalence m/M over 4 bar windows
45
46# key ambiguity list
47# correlation coefficient
48# harmony realization also adds pitches not available in midi
49
50
51# ------------------------------------------------------------------------------
52class NativeFeatureException(featuresModule.FeatureException):
53    pass
54
55
56class QualityFeature(featuresModule.FeatureExtractor):
57    '''
58    Extends the jSymbolic QualityFeature to automatically find mode.
59
60    Set to 0 if the key signature indicates that
61    a recording is major, set to 1 if it indicates
62    that it is minor.  A Music21
63    addition: if no key mode is found in the piece, or conflicting modes in the keys,
64    analyze the piece to discover what mode it is most likely in.
65
66    Example: Handel, Rinaldo Aria (musicxml) is explicitly encoded as being in Major:
67
68    >>> s = corpus.parse('handel/rinaldo/lascia_chio_pianga')
69    >>> fe = features.native.QualityFeature(s)
70    >>> f = fe.extract()
71    >>> f.vector
72    [0]
73
74    now we will try it with the last movement of Schoenberg's opus 19 which has
75    no mode explicitly encoded in the musicxml but which our analysis routines
76    believe (having very little to go on) fits the profile of e minor best.
77
78    >>> schoenberg19mvmt6 = corpus.parse('schoenberg/opus19', 6)
79    >>> fe2 = features.native.QualityFeature(schoenberg19mvmt6)
80    >>> f2 = fe2.extract()
81    >>> f2.vector
82    [1]
83
84
85    OMIT_FROM_DOCS
86
87    # for monophonic melodies
88    # incomplete measures / pickups for monophonic melodies
89
90    '''
91    id = 'P22'
92
93    def __init__(self, dataOrStream=None, *arguments, **keywords):
94        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
95
96        self.name = 'Quality'
97        self.description = '''
98            Set to 0 if the Key or KeySignature indicates that
99            a recording is major, set to 1 if it indicates
100            that it is minor.
101            Music21 addition: if no key mode is found in the piece, or conflicting
102            modes in the keys, analyze the piece to
103            discover what mode it is most likely in.
104            '''
105        self.isSequential = True
106        self.dimensions = 1
107
108    def process(self):
109        '''
110        Do processing necessary, storing result in feature.
111        '''
112        allKeys = self.data['flat.getElementsByClass(Key)']
113        keyFeature: Optional[int] = None
114        if len(allKeys) == 1:
115            k0 = allKeys[0]
116            if k0.mode == 'major':
117                keyFeature = 0
118            elif k0.mode == 'minor':
119                keyFeature = 1
120            self.feature.vector[0] = keyFeature
121            return
122
123        useKey = None
124        if len(allKeys) == 1:
125            useKey = allKeys[0]
126        elif len(allKeys) > 1:
127            seen_modes = set()
128            for k in allKeys:
129                seen_modes.add(k.mode)
130            if len(seen_modes) == 1:
131                # there might, for instance be lots of different parts
132                # all giving the same mode.  (maybe not the same key
133                # because of transposition).  It doesn't matter which
134                # key we use for this.
135                useKey = allKeys[0]
136            # else -- back to analysis.
137
138        if useKey is None:
139            useKey = self.data['flat.analyzedKey']
140
141        analyzedMode = useKey.mode
142        if analyzedMode == 'major':
143            keyFeature = 0
144        elif analyzedMode == 'minor':
145            keyFeature = 1
146        else:
147            raise NativeFeatureException(
148                'should be able to get a mode from something here -- '
149                + 'perhaps there are no notes?'
150            )
151
152        self.feature.vector[0] = keyFeature
153
154
155# ------------------------------------------------------------------------------
156class TonalCertainty(featuresModule.FeatureExtractor):
157    '''
158    >>> s = corpus.parse('bwv66.6')
159    >>> fe = features.native.TonalCertainty(s)
160    >>> f = fe.extract()
161    >>> f.vector
162    [1.26...]
163
164    >>> pitches = [56, 55, 56, 57, 58, 57, 58, 59, 60, 59, 60, 61, 62, 61,
165    ...            62, 63, 64, 63, 64, 65, 66, 65, 66, 67]
166    >>> s = stream.Stream()
167    >>> for pitch in pitches:
168    ...   s.append(note.Note(pitch))
169    >>> features.native.TonalCertainty(s).extract().vector
170    [0.0]
171    '''
172    id = 'K1'  # TODO: need id
173
174    def __init__(self, dataOrStream=None, *arguments, **keywords):
175        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
176
177        self.name = 'Tonal Certainty'
178        self.description = ('A floating point magnitude value that suggest tonal '
179                            'certainty based on automatic key analysis.')
180        self.dimensions = 1
181        self.discrete = False
182
183    def process(self):
184        '''Do processing necessary, storing result in feature.
185        '''
186        self.feature.vector[0] = self.data['flat.analyzedKey.tonalCertainty']
187
188
189# ------------------------------------------------------------------------------
190# features that use metrical distinctions
191
192class FirstBeatAttackPrevalence(featuresModule.FeatureExtractor):
193    '''
194    NOT IMPLEMENTED!
195
196    >>> s = corpus.parse('bwv66.6')
197    >>> fe = features.native.FirstBeatAttackPrevalence(s)
198    >>> f = fe.extract()
199    >>> f.vector
200    [0]
201
202    TODO: Implement!
203    '''
204    id = 'MP1'
205
206    def __init__(self, dataOrStream=None, *arguments, **keywords):
207        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
208
209        self.name = 'First Beat Attack Prevalence'
210        self.description = ('Fraction of first beats of a measure that have notes '
211                            'that start on this beat.')
212        self.dimensions = 1
213        self.discrete = False
214
215
216# ------------------------------------------------------------------------------
217# employing symbolic durations
218
219
220class UniqueNoteQuarterLengths(featuresModule.FeatureExtractor):
221    '''
222    >>> s = corpus.parse('bwv66.6')
223    >>> fe = features.native.UniqueNoteQuarterLengths(s)
224    >>> fe.extract().vector
225    [3]
226    '''
227    id = 'QL1'
228
229    def __init__(self, dataOrStream=None, *arguments, **keywords):
230        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
231
232        self.name = 'Unique Note Quarter Lengths'
233        self.description = 'The number of unique note quarter lengths.'
234        self.dimensions = 1
235        self.discrete = True
236
237    def process(self):
238        '''Do processing necessary, storing result in feature.
239        '''
240        count = 0
241        histo = self.data['flat.notes.quarterLengthHistogram']
242        for key in histo:
243            # all defined keys should be greater than zero, but just in case
244            if histo[key] > 0:
245                count += 1
246        self.feature.vector[0] = count
247
248
249class MostCommonNoteQuarterLength(featuresModule.FeatureExtractor):
250    '''
251    >>> s = corpus.parse('bwv66.6')
252    >>> fe = features.native.MostCommonNoteQuarterLength(s)
253    >>> fe.extract().vector
254    [1.0]
255    '''
256    id = 'QL2'
257
258    def __init__(self, dataOrStream=None, *arguments, **keywords):
259        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
260
261        self.name = 'Most Common Note Quarter Length'
262        self.description = 'The value of the most common quarter length.'
263        self.dimensions = 1
264        self.discrete = False
265
266    def process(self):
267        '''Do processing necessary, storing result in feature.
268        '''
269        histo = self.data['flat.notes.quarterLengthHistogram']
270        maximum = 0
271        ql = 0
272        for key in histo:
273            # all defined keys should be greater than zero, but just in case
274            if histo[key] >= maximum:
275                maximum = histo[key]
276                ql = key
277        self.feature.vector[0] = ql
278
279
280class MostCommonNoteQuarterLengthPrevalence(featuresModule.FeatureExtractor):
281    '''
282    >>> s = corpus.parse('bwv66.6')
283    >>> fe = features.native.MostCommonNoteQuarterLengthPrevalence(s)
284    >>> fe.extract().vector
285    [0.60...]
286    '''
287    id = 'QL3'
288
289    def __init__(self, dataOrStream=None, *arguments, **keywords):
290        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
291
292        self.name = 'Most Common Note Quarter Length Prevalence'
293        self.description = 'Fraction of notes that have the most common quarter length.'
294        self.dimensions = 1
295        self.discrete = False
296
297    def process(self):
298        '''Do processing necessary, storing result in feature.
299        '''
300        summation = 0  # count of all
301        histo = self.data['flat.notes.quarterLengthHistogram']
302        if not histo:
303            raise NativeFeatureException('input lacks notes')
304        maxKey = 0  # max found for any one key
305        for key in histo:
306            # all defined keys should be greater than zero, but just in case
307            if histo[key] > 0:
308                summation += histo[key]
309                if histo[key] >= maxKey:
310                    maxKey = histo[key]
311        self.feature.vector[0] = maxKey / summation
312
313
314class RangeOfNoteQuarterLengths(featuresModule.FeatureExtractor):
315    '''Difference between the longest and shortest quarter lengths.
316
317    >>> s = corpus.parse('bwv66.6')
318    >>> fe = features.native.RangeOfNoteQuarterLengths(s)
319    >>> fe.extract().vector
320    [1.5]
321    '''
322    id = 'QL4'
323
324    def __init__(self, dataOrStream=None, *arguments, **keywords):
325        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
326
327        self.name = 'Range of Note Quarter Lengths'
328        self.description = 'Difference between the longest and shortest quarter lengths.'
329        self.dimensions = 1
330        self.discrete = False
331
332    def process(self):
333        '''Do processing necessary, storing result in feature.
334        '''
335        histo = self.data['flat.notes.quarterLengthHistogram']
336        if not histo:
337            raise NativeFeatureException('input lacks notes')
338        minVal = min(histo.keys())
339        maxVal = max(histo.keys())
340        self.feature.vector[0] = maxVal - minVal
341
342
343# ------------------------------------------------------------------------------
344# various ways of looking at chordify representation
345
346# percentage of closed-position chords and
347# percentage of closed-position chords above bass  -- which looks at how many
348# 2 (or 3 in the second one) note chordify simultaneities are the same after
349# running .closedPosition() on them.  For the latter, we just delete the
350# lowest note of the chord before running that.
351
352
353class UniquePitchClassSetSimultaneities(featuresModule.FeatureExtractor):
354    '''Number of unique pitch class simultaneities.
355
356    >>> s = corpus.parse('bwv66.6')
357    >>> fe = features.native.UniquePitchClassSetSimultaneities(s)
358    >>> fe.extract().vector
359    [27]
360    '''
361    id = 'CS1'
362
363    def __init__(self, dataOrStream=None, *arguments, **keywords):
364        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
365
366        self.name = 'Unique Pitch Class Set Simultaneities'
367        self.description = 'Number of unique pitch class simultaneities.'
368        self.dimensions = 1
369        self.discrete = False
370
371    def process(self):
372        '''Do processing necessary, storing result in feature.
373        '''
374        count = 0
375        histo = self.data['chordify.flat.getElementsByClass(Chord).pitchClassSetHistogram']
376        for key in histo:
377            # all defined keys should be greater than zero, but just in case
378            if histo[key] > 0:
379                count += 1
380        self.feature.vector[0] = count
381
382
383class UniqueSetClassSimultaneities(featuresModule.FeatureExtractor):
384    '''Number of unique set class simultaneities.
385
386    >>> s = corpus.parse('bwv66.6')
387    >>> fe = features.native.UniqueSetClassSimultaneities(s)
388    >>> fe.extract().vector
389    [14]
390    '''
391    id = 'CS2'
392
393    def __init__(self, dataOrStream=None, *arguments, **keywords):
394        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
395
396        self.name = 'Unique Set Class Simultaneities'
397        self.description = 'Number of unique set class simultaneities.'
398        self.dimensions = 1
399        self.discrete = False
400
401    def process(self):
402        '''Do processing necessary, storing result in feature.
403        '''
404        count = 0
405        histo = self.data['chordify.flat.getElementsByClass(Chord).setClassHistogram']
406        for key in histo:
407            # all defined keys should be greater than zero, but just in case
408            if histo[key] > 0:
409                count += 1
410        self.feature.vector[0] = count
411
412
413class MostCommonPitchClassSetSimultaneityPrevalence(
414        featuresModule.FeatureExtractor):
415    '''Fraction of all pitch class simultaneities that are the most common simultaneity.
416
417    >>> s = corpus.parse('bwv66.6')
418    >>> fe = features.native.MostCommonPitchClassSetSimultaneityPrevalence(s)
419    >>> fe.extract().vector
420    [0.134...]
421    '''
422    id = 'CS3'
423
424    def __init__(self, dataOrStream=None, *arguments, **keywords):
425        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
426
427        self.name = 'Most Common Pitch Class Set Simultaneity Prevalence'
428        self.description = ('Fraction of all pitch class simultaneities that are '
429                            'the most common simultaneity.')
430        self.dimensions = 1
431        self.discrete = False
432
433    def process(self):
434        '''Do processing necessary, storing result in feature.
435        '''
436        summation = 0  # count of all
437        histo = self.data['chordify.flat.getElementsByClass(Chord).pitchClassSetHistogram']
438        maxKey = 0  # max found for any one key
439        if not histo:
440            raise NativeFeatureException('input lacks notes')
441        for key in histo:
442            # all defined keys should be greater than zero, but just in case
443            if histo[key] > 0:
444                summation += histo[key]
445                if histo[key] >= maxKey:
446                    maxKey = histo[key]
447        if summation != 0:
448            self.feature.vector[0] = maxKey / summation
449        else:
450            self.feature.vector[0] = 0
451
452
453class MostCommonSetClassSimultaneityPrevalence(featuresModule.FeatureExtractor):
454    '''
455    Fraction of all set class simultaneities that the most common simultaneity
456    occupies.
457
458    >>> s = corpus.parse('bwv66.6')
459    >>> fe = features.native.MostCommonSetClassSimultaneityPrevalence(s)
460    >>> fe.extract().vector
461    [0.653...]
462    >>> s2 = corpus.parse('schoenberg/opus19', 6)
463    >>> fe2 = features.native.MostCommonSetClassSimultaneityPrevalence(s2)
464    >>> fe2.extract().vector
465    [0.228...]
466    '''
467    id = 'CS4'
468
469    def __init__(self, dataOrStream=None, *arguments, **keywords):
470        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
471
472        self.name = 'Most Common Set Class Simultaneity Prevalence'
473        self.description = ('Fraction of all set class simultaneities that '
474                            'are the most common simultaneity.')
475        self.dimensions = 1
476        self.discrete = False
477
478    def process(self):
479        '''
480        Do processing necessary, storing result in feature.
481        '''
482        summation = 0  # count of all
483        histo = self.data['chordify.flat.getElementsByClass(Chord).setClassHistogram']
484        if not histo:
485            raise NativeFeatureException('input lacks notes')
486        maxKey = 0  # max found for any one key
487        for key in histo:
488            # all defined keys should be greater than zero, but just in case
489            if histo[key] > 0:
490                summation += histo[key]
491                if histo[key] >= maxKey:
492                    maxKey = histo[key]
493        if summation != 0:
494            self.feature.vector[0] = maxKey / summation
495        else:
496            self.feature.vector[0] = 0
497
498
499class MajorTriadSimultaneityPrevalence(featuresModule.FeatureExtractor):
500    '''
501    Percentage of all simultaneities that are major triads.
502
503    >>> s = corpus.parse('bwv66.6')
504    >>> fe = features.native.MajorTriadSimultaneityPrevalence(s)
505    >>> fe.extract().vector
506    [0.46...]
507    '''
508    id = 'CS5'
509
510    def __init__(self, dataOrStream=None, *arguments, **keywords):
511        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
512
513        self.name = 'Major Triad Simultaneity Prevalence'
514        self.description = 'Percentage of all simultaneities that are major triads.'
515        self.dimensions = 1
516        self.discrete = False
517
518    def process(self):
519        '''Do processing necessary, storing result in feature.
520        '''
521        # use for total number of chords
522        total = len(self.data['chordify.flat.getElementsByClass(Chord)'])
523        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
524        # using incomplete
525        if total != 0:
526            part = histo['isMajorTriad'] + histo['isIncompleteMajorTriad']
527            self.feature.vector[0] = part / total
528        else:
529            self.feature.vector[0] = 0
530
531
532class MinorTriadSimultaneityPrevalence(featuresModule.FeatureExtractor):
533    '''Percentage of all simultaneities that are minor triads.
534
535    >>> s = corpus.parse('bwv66.6')
536    >>> fe = features.native.MinorTriadSimultaneityPrevalence(s)
537    >>> fe.extract().vector  # same as major in this work
538    [0.211...]
539    '''
540    id = 'CS6'
541
542    def __init__(self, dataOrStream=None, *arguments, **keywords):
543        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
544
545        self.name = 'Minor Triad Simultaneity Prevalence'
546        self.description = 'Percentage of all simultaneities that are minor triads.'
547        self.dimensions = 1
548        self.discrete = False
549
550    def process(self):
551        '''Do processing necessary, storing result in feature.
552        '''
553        # use for total number of chords
554        total = len(self.data['chordify.flat.getElementsByClass(Chord)'])
555        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
556        # using incomplete
557        if total != 0:
558            part = histo['isMinorTriad'] + histo['isIncompleteMinorTriad']
559            self.feature.vector[0] = part / total
560        else:
561            self.feature.vector[0] = 0
562
563
564class DominantSeventhSimultaneityPrevalence(featuresModule.FeatureExtractor):
565    '''Percentage of all simultaneities that are dominant seventh.
566
567    >>> s = corpus.parse('bwv66.6')
568    >>> fe = features.native.DominantSeventhSimultaneityPrevalence(s)
569    >>> fe.extract().vector
570    [0.076...]
571    '''
572    id = 'CS7'
573
574    def __init__(self, dataOrStream=None, *arguments, **keywords):
575        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
576
577        self.name = 'Dominant Seventh Simultaneity Prevalence'
578        self.description = 'Percentage of all simultaneities that are dominant seventh.'
579        self.dimensions = 1
580        self.discrete = False
581
582    def process(self):
583        '''Do processing necessary, storing result in feature.
584        '''
585        # use for total number of chords
586        total = len(self.data['chordify.flat.getElementsByClass(Chord)'])
587        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
588        # using incomplete
589        if total != 0:
590            part = histo['isDominantSeventh']
591            self.feature.vector[0] = part / total
592        else:
593            self.feature.vector[0] = 0
594
595
596class DiminishedTriadSimultaneityPrevalence(featuresModule.FeatureExtractor):
597    '''Percentage of all simultaneities that are diminished triads.
598
599    >>> s = corpus.parse('bwv66.6')
600    >>> fe = features.native.DiminishedTriadSimultaneityPrevalence(s)
601    >>> fe.extract().vector
602    [0.019...]
603    '''
604    id = 'CS8'
605
606    def __init__(self, dataOrStream=None, *arguments, **keywords):
607        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
608
609        self.name = 'Diminished Triad Simultaneity Prevalence'
610        self.description = 'Percentage of all simultaneities that are diminished triads.'
611        self.dimensions = 1
612        self.discrete = False
613
614    def process(self):
615        '''Do processing necessary, storing result in feature.
616        '''
617        # use for total number of chords
618        total = len(self.data['chordify.flat.getElementsByClass(Chord)'])
619        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
620        # using incomplete
621        if total != 0:
622            part = histo['isDiminishedTriad']
623            self.feature.vector[0] = part / total
624        else:
625            self.feature.vector[0] = 0
626
627
628class TriadSimultaneityPrevalence(featuresModule.FeatureExtractor):
629    '''
630    Gives the proportion of all simultaneities which form triads (major,
631    minor, diminished, or augmented)
632
633
634    >>> s = corpus.parse('bwv66.6')
635    >>> fe = features.native.TriadSimultaneityPrevalence(s)
636    >>> fe.extract().vector
637    [0.692...]
638    >>> s2 = corpus.parse('schoenberg/opus19', 2)
639    >>> fe2 = features.native.TriadSimultaneityPrevalence(s2)
640    >>> fe2.extract().vector
641    [0.021739...]
642    '''
643    id = 'CS9'
644
645    def __init__(self, dataOrStream=None, *arguments, **keywords):
646        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
647
648        self.name = 'Triad Simultaneity Prevalence'
649        self.description = 'Proportion of all simultaneities that form triads.'
650        self.dimensions = 1
651        self.discrete = False
652
653    def process(self):
654        '''Do processing necessary, storing result in feature.
655        '''
656        # use for total number of chords
657        total = len(self.data['chordify.flat.getElementsByClass(Chord)'])
658        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
659        # using incomplete
660        if total != 0:
661            part = histo['isTriad']
662            self.feature.vector[0] = part / total
663        else:
664            self.feature.vector[0] = 0
665
666
667class DiminishedSeventhSimultaneityPrevalence(featuresModule.FeatureExtractor):
668    '''Percentage of all simultaneities that are diminished seventh chords.
669
670    >>> s = corpus.parse('bwv66.6')
671    >>> fe = features.native.DiminishedSeventhSimultaneityPrevalence(s)
672    >>> fe.extract().vector
673    [0.0]
674    '''
675    id = 'CS10'
676
677    def __init__(self, dataOrStream=None, *arguments, **keywords):
678        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
679
680        self.name = 'Diminished Seventh Simultaneity Prevalence'
681        self.description = 'Percentage of all simultaneities that are diminished seventh chords.'
682        self.dimensions = 1
683        self.discrete = False
684
685    def process(self):
686        '''Do processing necessary, storing result in feature.
687        '''
688        # use for total number of chords
689        total = len(self.data['chordify.flat.getElementsByClass(Chord)'])
690        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
691        # using incomplete
692        if total != 0:
693            part = histo['isDiminishedSeventh']
694            self.feature.vector[0] = part / total
695        else:
696            self.feature.vector[0] = 0
697
698
699class IncorrectlySpelledTriadPrevalence(featuresModule.FeatureExtractor):
700    '''
701    Percentage of all triads that are spelled incorrectly.
702
703    example:
704
705    Mozart k155 movement 2 has a single instance of an incorrectly spelled
706    triad (m. 17, where the C# of an A-major chord has a lower neighbor B#
707    thus temporarily creating an incorrectly spelled A-minor chord).
708
709    We would expect highly chromatic music such as Reger or Wagner to have
710    a higher percentage, or automatically rendered MIDI
711    transcriptions (which don't distinguish between D# and Eb).
712
713    >>> s = corpus.parse('bwv66.6')
714    >>> fe = features.native.IncorrectlySpelledTriadPrevalence(s)
715    >>> fe.extract().vector
716    [0.02...]
717    '''
718    id = 'CS11'
719
720    def __init__(self, dataOrStream=None, *arguments, **keywords):
721        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
722
723        self.name = 'Incorrectly Spelled Triad Prevalence'
724        self.description = 'Percentage of all triads that are spelled incorrectly.'
725        self.dimensions = 1
726        self.discrete = False
727
728    def process(self):
729        '''Do processing necessary, storing result in feature.
730        '''
731        # use for total number of chords
732        histo = self.data['chordify.flat.getElementsByClass(Chord).typesHistogram']
733        if not histo:
734            raise NativeFeatureException('input lacks notes')
735        # using incomplete
736        totalCorrectlySpelled = histo['isTriad']
737        forteData = self.data['chordify.flat.getElementsByClass(Chord).setClassHistogram']
738        totalForteTriads = 0
739        if '3-11' in forteData:
740            totalForteTriads += forteData['3-11']
741        if '3-12' in forteData:
742            totalForteTriads += forteData['3-12']
743        if '3-10' in forteData:
744            totalForteTriads += forteData['3-10']
745
746        totalIncorrectlySpelled = totalForteTriads - totalCorrectlySpelled
747
748        if totalForteTriads != 0:
749            self.feature.vector[0] = totalIncorrectlySpelled / totalForteTriads
750        else:
751            raise NativeFeatureException('input lacks Forte triads')
752
753
754class ChordBassMotionFeature(featuresModule.FeatureExtractor):
755    '''
756    A twelve element feature that reports the fraction
757    of all chord motion of music21.harmony.Harmony objects
758    that move up by i-half-steps. (a half-step motion down would
759    be stored in i = 11).  i = 0 is always 0.0 since consecutive
760    chords on the same pitch are ignored (unless there are 0 or 1 harmonies, in which case it is 1)
761
762    Sample test on Dylan's Blowing In The Wind (not included), showing all
763    motion is 3rds, 6ths, or especially 4ths and 5ths.
764
765    s = corpus.parse('demos/BlowinInTheWind')
766    fe = features.native.ChordBassMotionFeature(s)
767    fe.extract().vector
768
769    [0.0, 0.0, 0.0, 0.0416..., 0.0416..., 0.166..., 0.0, 0.54166..., 0.0, 0.0, 0.2083... 0.0]
770
771    For comparison, the Beatles Here Comes the Sun has more tone motion
772
773    [0.0, 0.05..., 0.14..., 0.03..., 0.06..., 0.3..., 0.008..., 0.303...,
774     0.0, 0.0, 0.07..., 0.008...]
775
776    Post 1990s music has a lot more semitone motion.
777
778    '''
779    id = 'CS12'
780
781    def __init__(self, dataOrStream=None, *arguments, **keywords):
782        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
783
784        self.name = 'Chord Bass Motion'
785        self.description = ('12-element vector showing the fraction of chords that move '
786                            'by x semitones (where x=0 is always 0 unless there are 0 '
787                            'or 1 harmonies, in which case it is 1).')
788        self.dimensions = 12
789        self.discrete = False
790
791    def process(self):
792        '''Do processing necessary, storing result in feature.
793        '''
794        # use for total number of chords
795        harms = self.data['flat.getElementsByClass(Harmony)']
796
797        totMotion = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
798        totalHarmonicMotion = 0
799        lastHarm = None
800
801        for thisHarm in harms:
802            if lastHarm is None:
803                lastHarm = thisHarm
804            else:
805                if lastHarm.bass() is not None:
806                    lastBass = lastHarm.bass()
807                else:
808                    lastBass = lastHarm.root()
809
810                if thisHarm.bass() is not None:
811                    thisBass = thisHarm.bass()
812                else:
813                    thisBass = thisHarm.root()
814
815                if lastBass.pitchClass == thisBass.pitchClass:
816                    pass
817                else:
818                    halfStepMotion = (lastBass.pitchClass - thisBass.pitchClass) % 12
819                    totMotion[halfStepMotion] += 1
820                    totalHarmonicMotion += 1
821                    lastHarm = thisHarm
822
823        if totalHarmonicMotion == 0:
824            vector = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
825        else:
826            totHarmonicMotionFraction = [0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
827            for i in range(1, 12):
828                totHarmonicMotionFraction[i] = float(totMotion[i]) / totalHarmonicMotion
829            vector = totHarmonicMotionFraction
830
831        self.feature.vector = vector
832
833
834# ------------------------------------------------------------------------------
835# metadata
836
837class ComposerPopularity(featuresModule.FeatureExtractor):
838    '''
839    REMOVED in v7 because Google's repsonse no longer includes result counts.
840    Empty class still here so that id won't be reused, but it's been removed
841    from this module's list of features.
842    '''
843    id = 'MD1'
844
845
846# ------------------------------------------------------------------------------
847# melodic contour
848
849
850class LandiniCadence(featuresModule.FeatureExtractor):
851    '''
852    Return a boolean if one or more Parts end with a Landini-like cadential figure.
853    '''
854    id = 'MC1'
855
856    def __init__(self, dataOrStream=None, *arguments, **keywords):
857        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
858
859        self.name = 'Ends With Landini Melodic Contour'
860        self.description = ('Boolean that indicates the presence of a Landini-like '
861                            'cadential figure in one or more parts.')
862        self.dimensions = 1
863        self.discrete = False
864
865    def process(self):
866        '''
867        Do processing necessary, storing result in feature.
868        '''
869        # store plausible ending half step movements
870        # these need to be lists for comparison
871        match = [[-2, 3], [-1, -2, 3]]
872
873        cBundle = []
874        if self.data.partsCount > 0:
875            for i in range(self.data.partsCount):
876                cList = self.data['parts'][i]['contourList']
877                cBundle.append(cList)
878        else:
879            cList = self.data['contourList']
880            cBundle.append(cList)
881
882        # iterate over each contour
883        found = False
884        for cList in cBundle:
885            # remove repeated notes
886            cListClean = []
887            for c in cList:
888                if c != 0:
889                    cListClean.append(c)
890            # find matches
891            for cMatch in match:
892                # environLocal.printDebug(['cList', cList, 'cListClean',
893                #    cListClean, 'cMatch', cMatch])
894                # compare to last
895                if len(cListClean) >= len(cMatch):
896                    # get the len of the last elements
897                    if cListClean[-len(cMatch):] == cMatch:
898                        found = True
899                        break
900            if found:
901                break
902        if found:
903            self.feature.vector[0] = 1
904
905
906# -----------------------------------------------------------------------------
907# text features
908
909class LanguageFeature(featuresModule.FeatureExtractor):
910    '''
911    language of text as a number
912    the number is the index of text.LanguageDetector.languageCodes + 1
913    or 0 if there is no language.
914
915    Detect that the language of a Handel aria is Italian.
916
917    >>> s = corpus.parse('handel/rinaldo/lascia_chio_pianga')
918    >>> fe = features.native.LanguageFeature(s)
919    >>> fe.extract().vector
920    [3]
921
922    '''
923    id = 'TX1'
924
925    def __init__(self, dataOrStream=None, *arguments, **keywords):
926        super().__init__(dataOrStream=dataOrStream, *arguments, **keywords)
927
928        self.name = 'Language Feature'
929        self.description = ('Language of the lyrics of the piece given as a numeric '
930                            'value from text.LanguageDetector.mostLikelyLanguageNumeric().')
931        self.dimensions = 1
932        self.discrete = True
933        self.languageDetector = text.LanguageDetector()
934
935    def process(self):
936        '''
937        Do processing necessary, storing result in feature.
938        '''
939        storedLyrics = self.data['assembledLyrics']
940        self.feature.vector[0] = self.languageDetector.mostLikelyLanguageNumeric(storedLyrics)
941
942
943# ------------------------------------------------------------------------------
944featureExtractors = [
945    QualityFeature,  # p22
946
947    TonalCertainty,  # k1
948
949    UniqueNoteQuarterLengths,  # ql1
950    MostCommonNoteQuarterLength,  # ql2
951    MostCommonNoteQuarterLengthPrevalence,  # ql3
952    RangeOfNoteQuarterLengths,  # ql4
953
954    UniquePitchClassSetSimultaneities,  # cs1
955    UniqueSetClassSimultaneities,  # cs2
956    MostCommonPitchClassSetSimultaneityPrevalence,  # cs3
957    MostCommonSetClassSimultaneityPrevalence,  # cs4
958    MajorTriadSimultaneityPrevalence,  # cs5
959    MinorTriadSimultaneityPrevalence,  # cs6
960    DominantSeventhSimultaneityPrevalence,  # cs7
961    DiminishedTriadSimultaneityPrevalence,  # cs8
962    TriadSimultaneityPrevalence,  # cs9
963    DiminishedSeventhSimultaneityPrevalence,  # cs10
964    IncorrectlySpelledTriadPrevalence,  # cs11
965    ChordBassMotionFeature,  # cs12
966
967    # ComposerPopularity,  # md1
968
969    LandiniCadence,  # mc1
970
971    LanguageFeature,  # tx1
972
973]
974
975
976# ------------------------------------------------------------------------------
977class Test(unittest.TestCase):
978
979    def testIncorrectlySpelledTriadPrevalence(self):
980        from music21 import stream
981        from music21 import features
982        from music21 import chord
983
984        s = stream.Stream()
985        s.append(chord.Chord(['c', 'e', 'g']))
986        s.append(chord.Chord(['c', 'e', 'a']))
987        s.append(chord.Chord(['c', 'd#', 'g']))
988        s.append(chord.Chord(['c', 'd#', 'a--']))
989
990        fe = features.native.IncorrectlySpelledTriadPrevalence(s)
991        self.assertEqual(str(fe.extract().vector[0]), '0.5')
992
993    def testLandiniCadence(self):
994        from music21 import converter
995        from music21 import features
996
997        s = converter.parse('tinynotation: 3/4 f#4 f# e g2')
998        fe = features.native.LandiniCadence(s)
999        self.assertEqual(fe.extract().vector[0], 1)
1000
1001        s = converter.parse('tinynotation: 3/4 f#4 f# f# g2')
1002        fe = features.native.LandiniCadence(s)
1003        self.assertEqual(fe.extract().vector[0], 0)
1004
1005        s = converter.parse('tinynotation: 3/4 f#4 e a g2')
1006        fe = features.native.LandiniCadence(s)
1007        self.assertEqual(fe.extract().vector[0], 0)
1008
1009
1010if __name__ == '__main__':
1011    import music21
1012    music21.mainTest(Test)
1013