1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2;;; ;; 3;;; Centre for Speech Technology Research ;; 4;;; University of Edinburgh, UK ;; 5;;; Copyright (c) 2002 ;; 6;;; All Rights Reserved. ;; 7;;; ;; 8;;; Permission is hereby granted, free of charge, to use and distribute ;; 9;;; this software and its documentation without restriction, including ;; 10;;; without limitation the rights to use, copy, modify, merge, publish, ;; 11;;; distribute, sublicense, and/or sell copies of this work, and to ;; 12;;; permit persons to whom this work is furnished to do so, subject to ;; 13;;; the following conditions: ;; 14;;; 1. The code must retain the above copyright notice, this list of ;; 15;;; conditions and the following disclaimer. ;; 16;;; 2. Any modifications must be clearly marked as such. ;; 17;;; 3. Original authors' names are not deleted. ;; 18;;; 4. The authors' names are not used to endorse or promote products ;; 19;;; derived from this software without specific prior written ;; 20;;; permission. ;; 21;;; ;; 22;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; 23;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; 24;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; 25;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; 26;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; 27;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; 28;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; 29;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; 30;;; THIS SOFTWARE. ;; 31;;; ;; 32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 33;;; Author: Rob Clark 34;;; Date: July 2002 35;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 36;; 37;; Sets up the current voice to synthesise from APML. 38;; 39;; 40 41(require 'apml_f2bf0lr) 42(require 'apml_kaldurtreeZ) 43 44;; Default pitch settings (if unspecified in current voice.) 45 46(defvar apml_default_pitch_mean 170 ) 47(defvar apml_default_pitch_standard_deviation 34 ) 48 49;; apml sythesis wrappers. 50 51(define (apml_client_synth apml) 52 "(apml_client_synth apml) 53Synthesise apml and return waveform(s) to client." 54 (utt.send.wave.client (apml_synth apml))) 55 56(define (apml_synth apml) 57"(apml_synth xml) 58Synthesis an apml string." 59(let ((tmpfile (make_tmp_filename)) 60 utt) 61 (string_to_file tmpfile apml) 62 (set! utt (apml_file_synth tmpfile)) 63 (delete-file tmpfile) 64 utt)) 65 66(define (apml_file_synth filename) 67 "(apml_file_synth filename) 68Synthesis an apml file." 69 (let ((utt (Utterance Tokens nil))) 70 (utt.load utt filename) 71 (utt.synth utt))) 72 73(define (string_to_file file s) 74"(string_to_file file string) 75 Write string to file." 76(let ((fd)) 77 (set! fd (fopen file "wb")) 78 (format fd "%s" s) 79 (fclose fd))) 80 81 82;;; 83;;; Phrasing. 84;;; 85 86;; phrasing CART. 87; 88; It has been decided that by default, only punctuation should affect 89; phrasing (and subsequently pauses) 90; 91(set! apml_phrase_tree 92 ' 93 ((lisp_apml_punc in ("?" "." ":")) ; big punctuation 94 ((BB)) 95 ((lisp_apml_punc in ("'" "\"" "," ";")) ; else little punctuation 96 ((B)) 97 ((lisp_apml_last_word is 1) 98 ((BB)) ; need a BB at the end! 99 ((NB)))))) ; else nothing 100 101;; feature functions for phrasing 102(define (apml_punc word) 103 (item.feat (item.relation.parent word 'Token) 'punc)) 104 105(define (apml_last_word word) 106 (if (item.next word) 107 "0" "1")) 108 109 110;;; 111;;; Pauses 112;;; 113 114;; feature functions for pauses 115(define (apml_is_pause word) 116 (if (item.relation (item.relation.parent word 'Token) 'Pause) 117 t 118 nil)) 119 120(define (apml_pause word) 121 (if (item.relation word 'Pause) 122 (item.feat (item.relation.parent (item.relation.parent word 'Token) 'Pause) "sec") 123 0)) 124 125(define (Apml_Pauses utt) 126 "(Pauses UTT) 127Predict pause insertion for apml." 128 (let ((words (utt.relation.items utt 'Word)) lastword tpname) 129 (if words 130 (begin 131 (insert_initial_pause utt) ;; always have a start pause 132 (set! lastword (car (last words))) 133 (mapcar 134 (lambda (w) 135 (let ((pbreak (item.feat w "pbreak")) 136 (emph (item.feat w "R:Token.parent.EMPH"))) 137 (cond 138 ((apml_is_pause w) 139 (insert_pause utt w)) 140 ((or (string-equal "B" pbreak) 141 (string-equal "BB" pbreak)) 142 (insert_pause utt w)) 143 ((equal? w lastword) 144 (insert_pause utt w))))) 145 words) 146 ;; The embarassing bit. Remove any words labelled as punc or fpunc 147 (mapcar 148 (lambda (w) 149 (let ((pos (item.feat w "pos"))) 150 (if (or (string-equal "punc" pos) 151 (string-equal "fpunc" pos)) 152 (let ((pbreak (item.feat w "pbreak")) 153 (wp (item.relation w 'Phrase))) 154 (if (and (string-matches pbreak "BB?") 155 (item.relation.prev w 'Word)) 156 (item.set_feat 157 (item.relation.prev w 'Word) "pbreak" pbreak)) 158 (item.relation.remove w 'Word) 159 ;; can't refer to w as we've just deleted it 160 (item.relation.remove wp 'Phrase))))) 161 words))) 162 utt)) 163 164 165 166;;; 167;;; Intonation. 168;;; 169 170;; Accent prediction (well transfer really). 171;; 172;; We treat L+H* L-H% on a single syllable as a special case. 173 174(set! apml_accent_cart 175 ' 176 ((lisp_apml_accent is "Hstar") 177 ((H*)) 178 ((lisp_apml_accent is "Lstar") 179 ((L*)) 180 ((lisp_apml_LHLH is "LHLH") 181 ((L+H*L-H%)) 182 ((lisp_apml_accent is "LplusHstar") 183 ((L+H*)) 184 ((lisp_apml_accent is "LstarplusH") 185 ((L*+H)) 186 ((NONE)))))))) 187 188(set! apml_boundary_cart 189 ' 190 ((lisp_apml_boundary is "LL") 191 ((L-L%)) 192 ((lisp_apml_LHLH is "LHLH") 193 ((NONE)) ; this is dealt with by the accent feature 194 ((lisp_apml_boundary is "LH") 195 ((L-H%)) 196 ((lisp_apml_boundary is "HH") 197 ((H-H%)) 198 ((lisp_apml_boundary is "HL") 199 ((H-L%)) 200 ((NONE)))))))) 201 202;; feature functions. 203(define (apml_accent syl) 204 (let ((token (item.relation.parent (item.relation.parent syl 'SylStructure) 'Token))) 205 (if (and (eq (item.feat syl 'stress) 1) 206 (item.relation.parent token 'Emphasis)) 207 (item.feat (item.relation.parent token 'Emphasis) 'x-pitchaccent) 208 0))) 209 210(define (apml_boundary syl) 211 (let ((token (item.relation.parent (item.relation.parent syl 'SylStructure) 'Token))) 212 (if (and (> (item.feat syl 'syl_break) 0) 213 (item.relation.parent token 'Boundary)) 214 (item.feat (item.relation.parent token 'Boundary) 'type) 215 0))) 216 217(define (apml_LHLH syl) 218 (let ((accent (apml_accent syl)) 219 (boundary (apml_boundary syl))) 220 (if (and (string-equal accent "LplusHstar") 221 (string-equal boundary "LH")) 222 "LHLH" 223 0))) 224 225 226(define (apml_seg_is_LHLH_vowel seg) 227 (if (and (string-equal (apml_LHLH (item.relation.parent seg 'SylStructure)) 228 "LHLH") 229 (string-equal (item.feat seg 'ph_vc) "+")) 230 "LHLH" 231 0)) 232 233 234;;;; feature functions: 235 236(define (apml_tgtype syl) 237 (let ((l (apml_boundl (item.relation.parent syl 'SylStructure))) 238 (r (apml_boundr (item.relation.parent syl 'SylStructure)))) 239 (if (eq (item.feat syl 'accented) 0) 240 0 ; this is a quirk related to the way the models were trained 241 (cond 242 ((eq l 0) 243 1) 244 ((eq r 1) 245 3) 246 (t 2))))) 247 248 249(define (apml_iecount syl) 250 (if (eq (item.feat syl 'accented) 0) 251 0 ; this is a quirk related to the way the models were trained 252 (+ (item.feat syl 'asyl_in) 1))) 253 254;; suport functions. 255(define (apml_boundl word) 256"(apml_boundl word) 257Number of boundaries in this performative to the left of this word." 258 (let ((w (item.prev word)) 259 (c 0)) 260 (while (and w (apml_same_p w word)) 261 (if (item.relation.parent (item.relation.parent w 'Token) 'Boundary) 262 (set! c (+ c 1))) 263 (set! w (item.prev w))) 264 c)) 265 266(define (apml_boundr word) 267"(apml_boundr word) 268Number of boundaries in this performative to the right of this word." 269 (let ((w word) 270 (c 0)) 271 (while (and w (apml_same_p w word)) 272 (if (item.relation.parent (item.relation.parent w 'Token) 'Boundary) 273 (set! c (+ c 1))) 274 (set! w (item.next w))) 275 c)) 276 277(define (apml_same_p w1 w2) 278"(apml_same_p w1 w2) 279 Are these two words in the same performative?" 280(let ((p1 (item.relation.parent (item.relation.parent w1 'Token) 'SemStructure)) 281 (p2 (item.relation.parent (item.relation.parent w1 'Token) 'SemStructure))) 282 (if (and (item.parent p1) (item.parent p2)) ; not true if theme/rheme omitted. 283 (equal? (item.parent p1) (item.parent p2)) 284 (equal? p1 p2)))) 285 286;;; 287;;; segment timings 288;;; 289 290(define (apml_seg_times utt) 291 "(apml_seg_times utt) 292Output the segment timings for an apml utterance." 293 (let ((segs (utt.relation.items utt 'Segment))) 294 (mapcar 295 (lambda (x) 296 (format t "%s %s\n" (item.name x) (item.feat x 'end))) 297 segs) 298 t)) 299 300;;; 301;;; Additional functions for f0model. 302;;; 303 304 305(define (find_hstar_left syl) 306"(find_hstar_left syl) 307If the closest accent or boundary to the left is H* return how many syllables away it is. Returns 0 if nearest accent is not H*" 308(let ((count 0)) 309 ;; if this syllable has a pitch event 310 (if (or (not (string-equal (item.feat syl 'tobi_accent) "NONE")) 311 (not (string-equal (item.feat syl 'tobi_endtone) "NONE"))) 312 0) 313 (while (and syl 314 (string-equal (item.feat syl 'tobi_accent) "NONE") 315 (string-equal (item.feat syl 'tobi_endtone) "NONE")) 316 (set! count (+ count 1)) 317 (set! syl (item.prev syl))) 318 (cond 319 ;; run out of syllables before finding accent 320 ((null syl) 321 0) 322 ((string-equal (item.feat syl 'tobi_accent) "H*") 323 count) 324 (t 0)))) 325 326(define (find_ll_right syl) 327"(find_ll_right syl) 328If the closest accent or boundary to the right is L-L% return how many syllables away it is. Returns 0 if nearest is not L-L%." 329(let ((count 0)) 330 ;; if this syllable has a pitch event 331 (if (or (not (string-equal (item.feat syl 'tobi_accent) "NONE")) 332 (not (string-equal (item.feat syl 'tobi_endtone) "NONE"))) 333 0) 334 (while (and syl 335 (string-equal (item.feat syl 'tobi_accent) "NONE") 336 (string-equal (item.feat syl 'tobi_endtone) "NONE")) 337 (set! count (+ count 1)) 338 (set! syl (item.next syl))) 339 (cond 340 ;; run out of syllables before finding boundary 341 ((null syl) 342 0) 343 ((string-equal (item.feat syl 'tobi_endtone) "L-L%") 344 count) 345 (t 0)))) 346 347(define (l_spread syl) 348"(l_spread syl) 349Proportion of pitch lowering required due to L- spreading backwards." 350(let ((l (find_hstar_left syl)) 351 (r (find_ll_right syl))) 352 (cond 353 ((or (eq l 0) 354 (eq r 0)) 355 0) 356 (t 357 (/ r (- (+ l r) 1)))))) 358 359 360;;; 361;;; Debuging and other useful stuff. 362;;; 363 364 365 366(define (apml_print_semstruct utt) 367"(apml_print_semstruct utt) 368Pretty print APML semantic structure." 369 (let ((i (utt.relation.first utt 'SemStructure))) 370 (while (not (null i)) 371 (apml_pss_item 0 i) 372 (apml_pss_daughters 1 (item.daughters i)) 373 (set! i (item.next i))))) 374 375(define (apml_pss_daughters depth list) 376 (mapcar 377 (lambda (x) 378 (apml_pss_item depth x) 379 (apml_pss_daughters (+ depth 1) (item.daughters x)) 380 ) 381 list)) 382 383 384(define (apml_pss_item depth item) 385 (let ((c 0)) 386 (while (< c depth) 387 (format t " ") 388 (set! c (+ c 1))) 389 (format t "%s\n" (item.name item)))) 390 391 392(define (apml_print_words utt) 393"(apml_print_words utt) 394 Pretty print APML words with associated accents." 395 (mapcar 396 (lambda (x) 397 (format t "%s (" (item.name x)) 398 (apml_pww_accent x) 399 (apml_pww_boundary x) 400 (apml_pww_pause x) 401 (format t ")\n")) 402 (utt.relation.items utt 'Word)) 403 t) 404 405(define (apml_pww_accent item) 406 (let ((p (item.relation.parent (item.relation.parent item 'Token) 'Emphasis))) 407 (if p (apml_ppw_list (item.features p))))) 408 409(define (apml_pww_boundary item) 410 (let ((p (item.relation.parent (item.relation.parent item 'Token) 'Boundary))) 411 (if p (apml_ppw_list (item.features p))))) 412 413(define (apml_pww_pause item) 414 (let ((p (item.relation.parent (item.relation.parent item 'Token) 'Pause))) 415 (if p (apml_ppw_list (item.features p))))) 416 417(define (apml_ppw_list l) 418 (mapcar 419 (lambda (x) 420 (format t " %s" x)) 421 (flatten l))) 422 423 424(define (apml_print_sylstructure utt filename) 425"(apml_print_sylstructure utt filename) 426Pretty print APML syllable structure. Filename t for stdout" 427 (let (fd) 428 (if (not (eq? filename t)) 429 (set! fd (fopen filename "wb")) 430 (set! fd t)) 431 (mapcar 432 (lambda (x) 433 (format fd "%s\n" (item.name x)) 434 (apml_psyl fd x)) 435 (utt.relation.items utt 'Word)) 436 t)) 437 438(define (apml_psyl fd word) 439 (mapcar 440 (lambda (x) 441 (apml_psegs fd x) 442 (if (eq (item.feat x 'stress) 1) 443 (format fd " (1)")) 444 (if (item.relation.daughter1 x 'Intonation) 445 (begin 446 (let ((ie (item.relation.daughter1 x 'Intonation))) 447 (format fd " [") 448 (while ie 449 (format fd "%s" (item.name ie)) 450 (set! ie (item.next ie)) 451 (if ie (format t " "))) 452 (format fd "]")))) 453 (format fd "\n")) 454 (item.daughters (item.relation word 'SylStructure)))) 455 456(define (apml_psegs fd syl) 457 (let ((segs (item.daughters syl))) 458 (format fd " ") 459 (while segs 460 (format fd "%s" (item.name (car segs))) 461 (if (cdr segs) 462 (format fd ".")) 463 (set! segs (cdr segs))))) 464 465 466(define (apml_get_lr_params) 467 (let ((m 0) 468 (s 0)) 469 (if (or (equal? (Parameter.get 'Int_Target_Method) Int_Targets_LR) 470 (equal? (Parameter.get 'Int_Target_Method) Int_Targets_5_LR)) 471 (begin 472 (set! m (car (cdr (car int_lr_params)))) 473 (set! s (car (cdr (car (cdr int_lr_params)))))) 474 (begin 475 (set! m apml_default_pitch_mean) 476 (set! s apml_default_pitch_standard_deviation))) 477 (list m s))) 478 479 480 481 482(define (apml_initialise) 483 "(apml_initialise) 484Set up the current voice for apml use." 485 (if (not (string-matches current-voice ".*multisyn.*")) ; nothing if multisyn 486 (cond 487 ((or (string-equal (Parameter.get 'Language) "americanenglish") 488 (string-equal (Parameter.get 'Language) "britishenglish")) 489 (begin 490 (format t "Initialising APML for English.\n") 491 ;; Phrasing. 492 (Parameter.set 'Phrase_Method 'cart_tree) 493 (set! phrase_cart_tree apml_phrase_tree) 494 ;; Pauses. 495 ;;(set! duration_cart_tree apml_kal_duration_cart_tree) 496 ;;(set! duration_ph_info apml_kal_durs) 497 ;;(Parameter.set 'Pause_Method Apml_Pauses) 498 ;; Lexicon. 499 ;;;; We now assume the lexicon you have already set is suitable, 500 ;;;; You probably want to ensure this is "apmlcmu" or "unilex" 501 ;;(if (not (member_string "apmlcmu" (lex.list))) 502 ;; (load (path-append lexdir "apmlcmu/apmlcmulex.scm"))) 503 ;;(lex.select "apmlcmu") 504 ;; Add other lex entries here: 505 ;;(lex.add.entry '("minerals" nil (((m ih n) 1) ((er) 0) ((ax l z) 0)))) 506 ;;(lex.add.entry '("fibre" nil (((f ay b) 1) ((er) 0)))) 507 ;;(lex.add.entry '("dont" v (((d ow n t) 1)))) 508 ;;(lex.add.entry '("pectoris" nil (((p eh k) 2) ((t ao r) 1) ((ih s) 0)))) 509 ;;(lex.add.entry '("sideeffects" nil (((s ay d) 1) ((ax f) 0) ((eh k t s) 2)))) 510 511 ;; Intonation events. 512 (set! int_accent_cart_tree apml_accent_cart) 513 (set! int_tone_cart_tree apml_boundary_cart) 514 (Parameter.set 'Int_Method Intonation_Tree) 515 ;; Intonation f0 contour. 516 (set! f0_lr_start apml_f2b_f0_lr_start) 517 (set! f0_lr_left apml_f2b_f0_lr_left) 518 (set! f0_lr_mid apml_f2b_f0_lr_mid) 519 (set! f0_lr_right apml_f2b_f0_lr_right) 520 (set! f0_lr_end apml_f2b_f0_lr_end) 521 (set! int_lr_params 522 (list (list 'target_f0_mean (car (apml_get_lr_params))) 523 (list 'target_f0_std (car (cdr (apml_get_lr_params)))) 524 (list 'model_f0_mean 170) 525 (list 'model_f0_std 40))) 526 (Parameter.set 'Int_Target_Method Int_Targets_5_LR) 527 nil)) 528 ((string-equal (Parameter.get 'Language) "italian") 529 (begin 530 (format t "Initialising APML for Italian.\n") 531 ;; Phrasing. 532 (Parameter.set 'Phrase_Method 'cart_tree) 533 (set! phrase_cart_tree apml_phrase_tree) 534 ;; Intonation events. 535 (set! int_accent_cart_tree apml_accent_cart) 536 (set! int_tone_cart_tree apml_boundary_cart) 537 (Parameter.set 'Int_Method Intonation_Tree) 538 ;; Intonation f0 contour. 539 (set! f0_lr_start apml_f2b_f0_lr_start) 540 (set! f0_lr_mid apml_f2b_f0_lr_mid) 541 (set! f0_lr_end apml_f2b_f0_lr_end) 542 (set! int_lr_params 543 (list (list 'target_f0_mean (car (apml_get_lr_params))) 544 (list 'target_f0_std (car (cdr (apml_get_lr_params)))) 545 (list 'model_f0_mean 170) 546 (list 'model_f0_std 34))) 547 (Parameter.set 'Int_Target_Method Int_Targets_LR) 548 nil)) 549 (t nil)))) 550 551(provide 'apml) 552