1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;;;                                                                       ;;
3;;;                Centre for Speech Technology Research                  ;;
4;;;                     University of Edinburgh, UK                       ;;
5;;;                       Copyright (c) 2003, 2004                        ;;
6;;;                        All Rights Reserved.                           ;;
7;;;                                                                       ;;
8;;;  Permission is hereby granted, free of charge, to use and distribute  ;;
9;;;  this software and its documentation without restriction, including   ;;
10;;;  without limitation the rights to use, copy, modify, merge, publish,  ;;
11;;;  distribute, sublicense, and/or sell copies of this work, and to      ;;
12;;;  permit persons to whom this work is furnished to do so, subject to   ;;
13;;;  the following conditions:                                            ;;
14;;;   1. The code must retain the above copyright notice, this list of    ;;
15;;;      conditions and the following disclaimer.                         ;;
16;;;   2. Any modifications must be clearly marked as such.                ;;
17;;;   3. Original authors' names are not deleted.                         ;;
18;;;   4. The authors' names are not used to endorse or promote products   ;;
19;;;      derived from this software without specific prior written        ;;
20;;;      permission.                                                      ;;
21;;;                                                                       ;;
22;;;  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ;;
23;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
24;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ;;
25;;;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ;;
26;;;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ;;
27;;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ;;
28;;;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ;;
29;;;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ;;
30;;;  THIS SOFTWARE.                                                       ;;
31;;;                                                                       ;;
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33;;;
34;;; Multisyn scheme target cost (Rob Clark and Korin Richmond)
35;;;
36;;;
37
38(define (Default_Target_Cost targ cand)
39"(Default_Target_Cost targ cand)
40A Default Target Cost function."
41(let ((cost 0))
42  (mapcar
43   (lambda (row)
44     (set! cost (+ cost (tc_eval_row row targ cand))))
45   target_matrix)
46  (set! cost (/ cost target_matrix_weight))
47  cost))
48
49
50(define (tc_eval_row row targ cand)
51  "(tc_eval_row row targ cand)
52Evaluate a target matrix row."
53(let ((weight (car row))
54      (func (cadr row))
55      (result 0))
56  (set! result (* weight (eval (list func targ cand))))
57  result))
58
59;;
60;; Target cost Matrix
61;;  '(weight function)
62
63(define (get_matrix_weight m)
64  (let ((w 0))
65    (mapcar
66     (lambda (x)
67       (set! w (+ w (car x))))
68     m)
69    w))
70
71
72(set! test_matrix_max_weight 1)
73(set! test_matrix
74'(
75  (10 tc_stress )
76  (5 tc_syl_pos )
77  (5 tc_word_pos)
78  (6 tc_partofspeech)
79  (7 tc_phrase_pos)
80  (4 tc_left_context)
81  (3 tc_right_context)
82  (25 tc_bad_f0) ;; set to equal 1/3 of total cost (so high because interaction with join)
83;  (0 tc_segment_score) ;; was 4. turned off until utterances are built for this.
84  (10 tc_bad_duration) ;; was 6
85))
86
87(set! test_matrix_weight (* test_matrix_max_weight (get_matrix_weight test_matrix)))
88
89(set! target_matrix test_matrix)
90(set! target_matrix_weight test_matrix_weight)
91
92
93
94;;
95;; tc_stress
96;;
97;; Compares stress on any vowel which form part of the diphone. stress
98;; conditions must match for a zero target cost.
99;;
100
101(define (tc_stress targ cand)
102"(tc_stress targ cand)
103Target Cost stressed. 0    - stress patterns match  [ compares: 0 unstressed vs. > 0 stressed ]
104                      1    - stress miss-match.
105"
106(let ((c 0)
107      cand_stress targ_stress)
108  ;(format t "my_is_vowel %l\n" (my_is_vowel targ))
109  ;(format t "phone_is_silence %l\n" (phone_is_silence (item.feat targ 'name)))
110  ;; For first segment
111  (if (and (not (phone_is_silence (item.feat targ 'name)))
112	   (my_is_vowel targ))
113      (begin
114	(set! cand_stress (item.feat cand "R:SylStructure.parent.stress"))
115	(set! targ_stress (item.feat targ "R:SylStructure.parent.stress"))
116	(if (or (and (eq? cand_stress 0) (> targ_stress 0))
117		(and (eq? targ_stress 0) (> cand_stress 0)))
118	    (set! c 1))))
119  ;; For second segment
120  ;(format t "n.my_is_vowel %l\n" (my_is_vowel (item.next targ)))
121  ;(format t "n.phone_is_silence %l\n" (phone_is_silence (item.feat targ 'n.name)))
122  (if (and (not (phone_is_silence (item.feat targ 'n.name)))
123	   (my_is_vowel (item.next targ)))
124      (begin
125	(set! cand_stress (item.feat cand "n.R:SylStructure.parent.stress"))
126	(set! targ_stress (item.feat targ "n.R:SylStructure.parent.stress"))
127	(if (or (and (eq? cand_stress 0) (> targ_stress 0))
128		(and (eq? targ_stress 0) (> cand_stress 0)))
129	    (set! c 1))))
130;  (format t "tc_stress: %l\n" c)
131c))
132
133
134;;
135;; tc_syl_position
136;;
137;; Find and compare diphone position in syllabic structure.
138;; Values are: inter - diphone crosses syllable boundary.
139;;             initial - diphone is syllable initial.
140;;             medial - diphone is syllable medial
141;;             final  - diphone is syllable final
142;; returns 0 for a match 1 for a mismatch.
143;;
144(define (tc_syl_pos targ cand)
145"(tc_syl_pos targ cand)
146Score position in syllable."
147(let ((targ_pos "medial")
148      (cand_pos "medial")
149      (targ_syl (get_syl targ))
150      (targ_next_syl (get_syl (item.next targ)))
151      (cand_syl (get_syl cand))
152      (cand_next_syl (get_syl (item.next cand))))
153  ;; target
154  (cond
155   ((not (equal? targ_syl targ_next_syl))
156    (set! targ_pos "inter"))
157   ((not (equal? targ_syl (get_syl (item.prev targ))))
158    (set! targ_pos "initial"))
159   ((not (equal? targ_next_syl (get_syl (item.next (item.next targ)))))
160    (set! targ_pos "final")))
161  ;; candidate
162  (cond
163   ((not (equal? cand_syl cand_next_syl))
164    (set! cand_pos "inter"))
165   ((not (equal? cand_syl (get_syl (item.prev cand))))
166    (set! cand_pos "initial"))
167   ((not (equal? cand_next_syl (get_syl (item.next (item.next cand)))))
168    (set! cand_pos "final")))
169;  (format t "targ_syl: %l   cand_syl %l\n" targ_pos cand_pos)
170   (if (equal? targ_pos cand_pos) 0 1)))
171
172;;
173;; tc_word_position
174;;
175;; Find and compare diphone position in word structure
176;; Values are: inter - diphone crosses word boundary.
177;;             initial - diphone is word initial.
178;;             medial - diphone is word medial
179;;             final  - diphone is word final
180;; returns 0 for a match 1 for a mismatch.
181;;
182(define (tc_word_pos targ cand)
183"(tc_word_pos targ cand)
184Score position in word."
185(let ((targ_pos "medial")
186      (cand_pos "medial")
187      (targ_word (get_word targ))
188      (targ_next_word (get_word (item.next targ)))
189      (cand_word (get_word cand))
190      (cand_next_word (get_word (item.next cand))))
191  ;; target
192  (cond
193   ((not (equal? targ_word targ_next_word))
194    (set! targ_pos "inter"))
195   ((not (equal? targ_word (get_word (item.prev targ))))
196    (set! targ_pos "initial"))
197   ((not (equal? targ_next_word (get_word (item.next (item.next targ)))))
198    (set! targ_pos "final")))
199  ;; candidate
200  (cond
201   ((not (equal? cand_word cand_next_word))
202    (set! cand_pos "inter"))
203   ((not (equal? cand_word (get_word (item.prev cand))))
204    (set! cand_pos "initial"))
205   ((not (equal? cand_next_word (get_word (item.next (item.next cand)))))
206    (set! cand_pos "final")))
207;  (format t "targ_word: %l   cand_word %l\n" targ_pos cand_pos)
208   (if (equal? targ_pos cand_pos) 0 1)))
209
210
211
212;;
213;; tc_phrase_position
214;;
215;; Position (of word) in phrase
216;; initial/medial/final
217;;
218;; 0 - match, 1 - mismatch
219;;
220(define (tc_phrase_pos targ cand)
221"(tc_phrase_pos targ cand)
222 Score position in phrase."
223(let ((targ_word (get_word targ))
224      (cand_word (get_word cand)))
225  (cond
226   ((and (null targ_word)
227	 (null cand_word))
228    0)
229   ((or (null targ_word)
230	(null cand_word))
231    1)
232   ((string-equal (item.feat targ_word 'pbreak)
233		  (item.feat cand_word 'pbreak))
234    0)
235   (t 1))))
236
237;;
238;; tc_partofspeech
239;;
240;;
241;;
242(define (tc_partofspeech targ cand)
243"(tc_partofspeech targ cand)
244 Score part of speech."
245(let ((targ_word (get_word targ))
246      (cand_word (get_word cand))
247      targ_pos cand_pos)
248(if targ_word
249    (set! targ_pos (simple_pos (item.feat targ_word 'pos))))
250(if cand_word
251    (set! cand_pos (simple_pos (item.feat cand_word 'pos))))
252  ;(format t "targ_pos %l cand_pos %l\n" targ_pos cand_pos)
253  (if (equal? targ_pos cand_pos) 0 1)))
254
255(define (score_contexts targ_context cand_context)
256  "(score_contexts targ_context cand_context)
257If both context items are nil, then score is 0.
258If both context items are not nil, and are the same, then
259score is 0. Otherwise, score is 1."
260  (if (and targ_context cand_context)
261	(if (equal? (item.feat targ_context "name")
262		    (item.feat cand_context "name"))
263	    0
264	    1)
265      (if (and (equal? targ_context nil)
266	       (equal? cand_context nil))
267	  0
268	  1)))
269
270
271(define (tc_left_context targ cand)
272"(tc_left_context targ cand)
273Score left phonetic context."
274(let ((targ_context (item.prev targ))
275      (cand_context (item.prev cand)))
276  (score_contexts targ_context cand_context)))
277
278;;
279;; tc_right_context
280;;
281;;
282;;
283(define (tc_right_context targ cand)
284"(tc_right_context targ cand)
285Score right phonetic context."
286(let ((targ_context (item.next (item.next targ)))
287      (cand_context (item.next (item.next cand))))
288  (score_contexts targ_context cand_context)))
289
290
291;;
292;; tc_segment_score
293;;
294;; This currently thresholds based on looking at the distributions of the scores.
295;; A nice exp function may be better.
296(define (tc_segment_score targ cand)
297"tc_segment_score targ cand)
298A bad alignment score make a bad segment."
299(let ((score 0))
300  (if (not (phone_is_silence (item.feat cand "name")))
301      (set! score  (+ score (item.feat cand 'score))))
302  (if (not (phone_is_silence (item.feat (item.next cand) "name")))
303      (set! score  (+ score (item.feat (item.next cand) 'score))))
304  (cond
305   ((> score -4000)    ;2000 (x2) is 7.5%
306    0)
307   ((> score -5000)    ;2500 (x2) is 5.0%
308    0.5)
309   (t 1))))
310
311;;
312;; tc_bad_duration
313;;
314;; If the segment is marked as having a weird duration penalise it.
315;; We allow bad_dur to be set on the target so resynthesis works
316;; and so you could ask for really long/short segments.
317;;
318(define (tc_bad_duration targ cand)
319  (if (equal? (item.feat targ "bad_dur")
320	      (item.feat cand "bad_dur"))
321      0
322      1))
323
324
325;;
326;; tc_bad_f0
327;;
328;; If the candidate is deemed to have an inappropriate f0, then penalise it.
329;;
330;; Specifically, if the targ/cand segment type is expected to be voiced, then
331;; an f0 of zero is bad (results from poor pitch tracking).  In such a case,
332;; the join cost would then favour other units with f0 (since the euclidean
333;; distance between two zeros is very small ;)
334;; We want to avoid that.
335;;
336;; Presumeably, we also want to penalise cases where supposedly voiceless
337;; candidates have an f0 != 0 (either a consequence of bad pitch tracking
338;; or bad labelling) but that's not done here yet...
339;;
340;; (the function itself has been implemented in C for convenience, and
341;; this stub is left here just for this note ;)
342
343(define (tc_bad_f0 targ cand)
344  (let ((score (temp_tc_bad_f0 targ cand))
345	(name (format nil "%s_%s"
346		      (item.feat targ "name")
347		      (item.feat (item.next targ) "name"))))
348    (if (not (equal? score 0.0))
349	(format t "f0 score for %s is %f\n" name score))
350    score))
351
352;;
353;; Is a segment a vowel?  ( ph_is_a_vowel doesn't seem to work)
354;;
355(define (my_is_vowel seg)
356  (if seg
357      (if (equal? (item.feat seg 'ph_vc) "+")
358	  t
359	  nil)))
360
361
362
363;; get the syllable from sysstructure in normal utterance
364;;
365(define (get_syl seg)
366  (let (syl)
367    (if seg
368	(set! syl (item.relation.parent seg 'SylStructure)))
369    syl))
370
371;; get the word from sylstructure in normal utterance
372;;
373(define (get_word seg)
374  (let ((syl (get_syl seg))
375	word)
376    (if syl
377	(set! word (item.parent syl)))
378    word))
379
380
381;; simple pos
382;;
383(define (simple_pos pos)
384(let (spos)
385  (cond
386   ((member_string pos '(vbd vb vbn vbz vbp vbg))
387    (set! spos "v"))
388   ((member_string pos '(nn nnp nns nnps fw sym ls))
389    (set! spos "n"))
390   ((member_string pos '(dt gin prp cc of to cd md pos wdt wp wrb ex uh pdt))
391    (set! spos "func"))
392   ((member_string pos '(jj jjr jjs 1 2 rb rp rbr rbs))
393    (set! spos "other")))
394  spos))
395
396
397;; debugging
398
399(define (test_target_cost utt1 utt2)
400(let ((segs1 (utt.relation.items utt1 'Segment))
401      (segs2 (utt.relation.items utt2 'Segment))
402      (tc 0))
403  (while (and segs1 segs2)
404	 (set! tc (Default_Target_Cost (car segs1) (car segs2)))
405	 (format t "targ: %l cand: %l cost: %l\n" (item.name (car segs1)) (item.name (car segs2)) tc)
406	 (set! segs1 (cdr segs1))
407	 (set! segs2 (cdr segs2)))))
408
409
410(provide 'target_cost)
411