1;;; Czech support for Festival 2 3;; Copyright (C) 2003, 2004, 2005, 2006 Brailcom, o.p.s. 4 5;; Author: Milan Zamazal <pdm@brailcom.org> 6 7;; COPYRIGHT NOTICE 8 9;; This program is free software; you can redistribute it and/or modify 10;; it under the terms of the GNU General Public License as published by 11;; the Free Software Foundation; either version 2 of the License, or 12;; (at your option) any later version. 13 14;; This program is distributed in the hope that it will be useful, but 15;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17;; for more details. 18 19;; You should have received a copy of the GNU General Public License 20;; along with this program; if not, write to the Free Software 21;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 22 23 24;;; Utility functions 25 26(define (czech-min x y) 27 (if (<= x y) x y)) 28 29(define (czech-max x y) 30 (if (>= x y) x y)) 31 32(define (czech-item.has-feat item feat) 33 (assoc feat (item.features item))) 34 35(define (czech-item.feat? item feat value) 36 (and item (string-equal (item.feat item feat) value))) 37 38(define (czech-item.feat*? item feat value) 39 (and item (string-matches (item.feat item feat) value))) 40 41(define (czech-all-same lst) 42 (or (<= (length lst) 1) 43 (and (string-equal (car lst) (cadr lst)) 44 (czech-all-same (cdr lst))))) 45 46(define (czech-suffix string i) 47 (substring string i (- (string-length string) i))) 48 49(defvar czech-randomize t) 50 51(defvar czech-rand-range nil) 52 53(defvar czech-moravian t) 54 55(defvar czech-insert-filling-vowels t) 56 57(defvar czech-group-digits 3) 58 59(define (czech-rand) 60 (if czech-randomize 61 (begin 62 (if (not czech-rand-range) 63 (let ((n 100) 64 (max 0)) 65 (while (> n 0) 66 (let ((r (rand))) 67 (if (> r max) 68 (set! max r))) 69 (set! n (- n 1))) 70 (set! czech-rand-range 1) 71 (while (> max czech-rand-range) 72 (set! czech-rand-range (* 2 czech-rand-range))))) 73 (/ (rand) czech-rand-range)) 74 0.5)) 75 76(define (czech-random-choice lst) 77 (let ((max (length lst))) 78 (let ((n (* (czech-rand) max))) 79 (nth n lst)))) 80 81(define (czech-next-token-punc word) 82 (if (item.relation.next word "Token") 83 "0" 84 (item.feat word "R:Token.n.daughter1.prepunctuation"))) 85 86(define (czech-next-punc word) 87 (let ((token (item.next (item.parent (item.relation word 'Token))))) 88 (while (and token (not (string-matches (item.feat token 'punc) "[^0]+"))) 89 (set! token (item.next token))) 90 (if token 91 (item.feat token 'punc) 92 0))) 93 94(define (czech-prev-punc word) 95 (let ((token (item.prev (item.parent (item.relation word 'Token))))) 96 (while (and token (not (string-matches (item.feat token 'punc) "[^0]+"))) 97 (set! token (item.prev token))) 98 (if token 99 (item.feat token 'punc) 100 0))) 101 102(define (czech-word-stress-unit word) 103 (let ((sylword (item.relation word 'SylStructure))) 104 (if (and sylword (item.daughter1 sylword)) 105 (item.parent (item.relation (item.daughter1 sylword) 'StressUnit))))) 106 107(define (czech-stress-unit-punc unit) 108 (and unit 109 (item.feat unit "daughtern.R:SylStructure.parent.R:Token.parent.punc"))) 110 111;;; Phone set 112 113(defPhoneSet czech 114 (;; vowel or consonant: vowel consonant 115 (vc + - 0) 116 ;; vowel length: short long 117 (vlng s l 0) 118 ;; consonant voicing: yes no unique 119 (cvox + - u 0) 120 ;; can create a syllable: yes no 121 (syl + - 0) 122 ;; can make previous consonant nasal: yes no 123 (postnas + - 0) 124 ;; voiced/unvoiced counterpart: phone 125 (partner b c c~ ch d d~ dz dz~ f g h k p r~* s s~ t t~ v z z~ 0) 126 ) 127 ( 128 ;; c l v s n p 129 (# 0 0 0 0 0 0) ; pause 130 (_ 0 0 0 - 0 0) ; vowel-vowel stroke 131 (a + s 0 + - 0) 132 (a: + l 0 + - 0) 133 (b - 0 + - - p) 134 (c - 0 - - - dz) 135 (c~ - 0 - - - dz~) 136 (ch - 0 - - - 0) 137 (d - 0 + - - t) 138 (d~ - 0 + - - t~) 139 (dz - 0 + - - c) 140 (dz~ - 0 + - - c~) 141 (e + s 0 + - 0) 142 (e: + l 0 + - 0) 143 (f - 0 - - - v) 144 (g - 0 + - + k) 145 (h - 0 + - - ch) 146 (i + s 0 + - 0) 147 (i: + l 0 + - 0) 148 (j - 0 u - - 0) 149 (k - 0 - - + g) 150 (l - 0 u + - 0) 151 (m - 0 u - - 0) 152 (n - 0 u - - 0) 153 (n* - 0 u - - 0) ; n before k or g 154 (n~ - 0 u - - 0) 155 (o + s 0 + - 0) 156 (o: + l 0 + - 0) 157 (p - 0 - - - b) 158 (r - 0 u + - 0) 159 (r~ - 0 + - - r~*) ; (default) voiced r~, may change to r~* 160 (r~* - 0 - - - 0) ; unvoiced r~, can't change back to r~ 161 (s - 0 - - - z) 162 (s~ - 0 - - - z~) 163 (t - 0 - - - d) 164 (t~ - 0 - - - d~) 165 (u + s 0 + - 0) 166 (u: + l 0 + - 0) 167 (v - 0 + - - f) 168 (z - 0 + - - s) 169 (z~ - 0 + - - s~) 170 ) 171) 172(PhoneSet.silences '(#)) 173 174(defvar czech-phoneset-translation '()) 175(defvar czech-phoneset-translation* nil) 176 177;;; Text to phones 178 179(lts.ruleset 180 czech-normalize 181 ;; just transforms the texts to a canonical form 182 () 183 ( 184 ( [ a ] = a ) 185 ( [ � ] = � ) 186 ( [ � ] = e ) 187 ( [ b ] = b ) 188 ( [ c ] = c ) 189 ( [ � ] = � ) 190 ( [ d ] = d ) 191 ( [ � ] = � ) 192 ( [ e ] = e ) 193 ( [ � ] = � ) 194 ( [ � ] = � ) 195 ( [ f ] = f ) 196 ( [ g ] = g ) 197 ( [ h ] = h ) 198 ( [ i ] = i ) 199 ( [ � ] = � ) 200 ( [ j ] = j ) 201 ( [ k ] = k ) 202 ( [ l ] = l ) 203 ( [ m ] = m ) 204 ( [ n ] = n ) 205 ( [ � ] = � ) 206 ( [ o ] = o ) 207 ( [ � ] = � ) 208 ( [ � ] = e ) 209 ( [ p ] = p ) 210 ( [ q ] = q ) 211 ( [ r ] = r ) 212 ( [ � ] = � ) 213 ( [ s ] = s ) 214 ( [ � ] = � ) 215 ( [ � ] = s ) 216 ( [ t ] = t ) 217 ( [ � ] = � ) 218 ( [ u ] = u ) 219 ( [ � ] = � ) 220 ( [ � ] = � ) 221 ( [ � ] = y ) 222 ( [ v ] = v ) 223 ( [ w ] = w ) 224 ( [ x ] = x ) 225 ( [ y ] = y ) 226 ( [ � ] = � ) 227 ( [ z ] = z ) 228 ( [ � ] = � ) 229 ( [ A ] = a ) 230 ( [ � ] = � ) 231 ( [ � ] = e ) 232 ( [ B ] = b ) 233 ( [ C ] = c ) 234 ( [ � ] = � ) 235 ( [ D ] = d ) 236 ( [ � ] = � ) 237 ( [ E ] = e ) 238 ( [ � ] = � ) 239 ( [ � ] = � ) 240 ( [ F ] = f ) 241 ( [ G ] = g ) 242 ( [ H ] = h ) 243 ( [ I ] = i ) 244 ( [ � ] = � ) 245 ( [ J ] = j ) 246 ( [ K ] = k ) 247 ( [ L ] = l ) 248 ( [ M ] = m ) 249 ( [ N ] = n ) 250 ( [ � ] = � ) 251 ( [ O ] = o ) 252 ( [ � ] = � ) 253 ( [ � ] = e ) 254 ( [ P ] = p ) 255 ( [ Q ] = q ) 256 ( [ R ] = r ) 257 ( [ � ] = � ) 258 ( [ S ] = s ) 259 ( [ � ] = � ) 260 ( [ T ] = t ) 261 ( [ � ] = � ) 262 ( [ U ] = u ) 263 ( [ � ] = � ) 264 ( [ � ] = � ) 265 ( [ � ] = y ) 266 ( [ V ] = v ) 267 ( [ W ] = w ) 268 ( [ X ] = x ) 269 ( [ Y ] = y ) 270 ( [ � ] = � ) 271 ( [ Z ] = z ) 272 ( [ � ] = � ) 273 ;; digits are here to make this rule set usable in some other cases 274 ( [ 0 ] = 0 ) 275 ( [ 1 ] = 1 ) 276 ( [ 2 ] = 2 ) 277 ( [ 3 ] = 3 ) 278 ( [ 4 ] = 4 ) 279 ( [ 5 ] = 5 ) 280 ( [ 6 ] = 6 ) 281 ( [ 7 ] = 7 ) 282 ( [ 8 ] = 8 ) 283 ( [ 9 ] = 9 ) 284 )) 285 286(lts.ruleset 287 czech-orthography 288 ;; transforms Czech written text to a phonetic form 289 ((BPV b p v) 290 (DTN d t n) 291 (�I � i �) 292 (IY i y) 293 (�� � �) 294 (#_ # _) 295 (Vowel a � e � � i � o � u � � y �) 296 (Vowel+# a � e � � i � o � u � � y � #) 297 (SZ s z)) 298 ( 299 ;; Special rules 300 ( [ d ] i SZ m u = d ) 301 ( [ n ] i SZ m u = n ) 302 ( [ t ] i SZ m u = t ) 303 ( [ n ] i s t = n ) 304 ( [ t ] i s t = t ) 305 ( [ t ] i c k = t ) 306 ( [ t ] i � t � = t ) 307 ( # a n [ t ] i = t ) 308 ( # a n t [ i ] Vowel = i ) 309 ( t e c h [ n ] i = n ) 310 ( [ d ] i s p = d ) 311 312 ( l i [ c ] o m = c ) 313 ( [ c ] o m = k ) 314 315 ( f r [ e e ] = i: ) 316 317 ( m l a [ d ] i s t = d~ ) 318 ( [ d ] i s t = d ) 319 320 ( # t r a [ t ] i v = t~ ) 321 ( � [ t ] i v = t~ ) 322 ( b o l e s [ t ] i v = t~ ) 323 ( c [ t ] i v = t~ ) 324 ( c e [ t ] i v = t~ ) 325 ( c h [ t ] i v = t~ ) 326 ( c h a m [ t ] i v = t~ ) 327 ( c h r a p [ t ] i v = t~ ) 328 ( c h r o p [ t ] i v = t~ ) 329 ( � [ t ] i v = t~ ) 330 ( d r [ t ] i v = t~ ) 331 ( � [ t ] i v = t~ ) 332 ( f i n [ t ] i v = t~ ) 333 ( h l [ t ] i v = t~ ) 334 ( h o [ t ] i v = t~ ) 335 ( h � m o [ t ] i v = t~ ) 336 ( � [ t ] i v = t~ ) 337 ( k l e v e [ t ] i v = t~ ) 338 ( k r o u [ t ] i v = t~ ) 339 ( o s [ t ] i v = t~ ) 340 ( p i [ t ] i v = t~ ) 341 ( p l e [ t ] i v = t~ ) 342 ( p o l [ t ] i v = t~ ) 343 ( r o [ t ] i v = t~ ) 344 ( s e [ t ] i v = t~ ) 345 ( s m r [ t ] i v = t~ ) 346 ( s o p [ t ] i v = t~ ) 347 ( � [ t ] i v = t~ ) 348 ( v r [ t ] i v = t~ ) 349 ( y [ t ] i v = t~ ) 350 ( � � d o s [ t ] i v = t~ ) 351 ( d i g e s [ t ] i v = t ) 352 ( f e s [ t ] i v = t ) 353 ( k o n t r a s [ t ] i v = t ) 354 ( r e z i s [ t ] i v = t ) 355 ( s u g e s [ t ] i v = t ) 356 ( s [ t ] i v = t~ ) 357 ( [ t ] i v = t ) 358 359 ;; Special orthography rules 360 ( [ d ] �I = d~ ) 361 ( [ t ] �I = t~ ) 362 ( [ n ] �I = n~ ) 363 ( DTN [ � ] = e ) 364 ( BPV [ � ] = j e ) 365 ( m [ � ] = n~ e ) 366 ;; `i' handling 367 ( # m e z [ i ] Vowel = i _ ) 368 ( #_ [ IY ] #_ = i ) 369 ( Vowel+# [ IY ] Vowel+# = j ) 370 ( Vowel [ �� ] Vowel = j i: j ) 371 ( [ IY ] Vowel = i j ) 372 ( [ �� ] Vowel = i: j ) 373 ( Vowel [ IY ] = j ) 374 ( Vowel [ �� ] = j i: ) 375 ;; Some vowel-vowel pairs 376 ( m i m [ o ] Vowel = o _ ) 377 ( # m n o h [ o ] Vowel = o _ ) 378 ;; Two-letter phonemes 379 ( [ d � ] = dz~ ) 380 ( [ d z ] = dz ) 381 ( [ c h ] = ch ) 382 ;; Special letters 383 ( [ � ] = j e ) 384 ( # [ � ] = u: ) 385 ( b e z [ � ] = _ u: ) 386 ( o [ � ] = _ u: ) 387 ( [ � ] h = _ u: ) 388 ( [ � ] � e = _ u: ) 389 ( [ � ] � t = _ u: ) 390 ( [ � ] d r � = _ u: ) 391 ( [ � ] l o h = _ u: ) 392 ( [ � ] r o � = _ u: ) 393 ( [ � ] r o d = _ u: ) 394 ( [ � ] r o v � = _ u: ) 395 ;; Simple letters 396 ( [ a ] = a ) 397 ( [ � ] = a: ) 398 ( [ b ] = b ) 399 ( [ c ] = c ) 400 ( [ � ] = c~ ) 401 ( [ d ] = d ) 402 ( [ � ] = d~ ) 403 ( [ e ] = e ) 404 ( [ � ] = e: ) 405 ( [ f ] = f ) 406 ( [ g ] = g ) 407 ( [ h ] = h ) 408 ( [ i ] = i ) 409 ( [ � ] = i: ) 410 ( [ j ] = j ) 411 ( [ k ] = k ) 412 ( [ l ] = l ) 413 ( [ m ] = m ) 414 ( [ n ] = n ) 415 ( [ � ] = n~ ) 416 ( [ o ] = o ) 417 ( [ � ] = o: ) 418 ( [ p ] = p ) 419 ( [ q ] = k v ) 420 ( [ r ] = r ) 421 ( [ � ] = r~ ) 422 ( [ s ] = s ) 423 ( [ � ] = s~ ) 424 ( [ t ] = t ) 425 ( [ � ] = t~ ) 426 ( [ u ] = u ) 427 ( [ � ] = u: ) 428 ( [ � ] = u: ) 429 ( [ v ] = v ) 430 ( [ w ] = v ) 431 ( [ x ] = k s ) 432 ( [ y ] = i ) 433 ( [ � ] = i: ) 434 ( [ z ] = z ) 435 ( [ � ] = z~ ) 436 )) 437 438 439;; -- missing diphones: n-f n-g n-k 440;; -- special diphones: a-a: a-e: a-o: a-u: a:-a a:-a: a:-e a:-e: a:-o a:-o: 441;; a:-u a:-u: e-a: e-e: e-o: e-u: e:-a e:-a: atd. 442;;;; 443 444(defvar czech-unknown-symbol-word "nezn�m�") 445 446(defvar czech-lts-extra-rules '()) 447 448(define (czech-basic-lts word) 449 (let ((word (if (lts.in.alphabet word 'czech-normalize) 450 word 451 czech-unknown-symbol-word))) 452 (if (string-equal word "") 453 nil 454 (let ((phonetic-form (lts.apply 455 (lts.apply word 'czech-normalize) 456 'czech-orthography)) 457 phonetic-form*) 458 phonetic-form)))) 459 460(define (czech-syllabify-phstress phones) 461 (if (null? phones) 462 () 463 (list (list phones 0)))) 464 465(define (czech-lts word features) 466 (list word 467 nil 468 (let ((transformed (and (not (string-equal word "")) 469 (czech-basic-lts word)))) 470 (if transformed 471 (czech-syllabify-phstress 472 (let ((rules czech-lts-extra-rules*)) 473 (while rules 474 (set! transformed (lts.apply transformed (car rules))) 475 (set! rules (cdr rules))) 476 transformed)) 477 '())))) 478 479(define (czech-downcase word) 480 (if (lts.in.alphabet word 'czech-normalize) 481 (apply string-append (lts.apply word 'czech-normalize)) 482 word)) 483 484;;; Tokenization 485 486(defvar czech-token.unknown-word-name "nezn�m�") 487(defvar czech-token.separator-word-name "odd�lova�") ; our own variable 488(defvar czech-token.garbage-word-name "smet�") ; our own variable 489(defvar czech-token.whitespace " �\t\n\r") 490(defvar czech-token.punctuation "\"'`.,:;!?-(){}[]<>") 491(defvar czech-token.prepunctuation "\"'`({[<") 492 493;;; Token to words processing 494 495(defvar czech-chars "a-zA-Z����������������������������ة����ݮ") 496(defvar czech-char-regexp (string-append "[" czech-chars "]")) 497 498(defvar czech-multiword-abbrevs 499 '(("�" ("dlouh�" "a")) 500 ("�" ("dlouh�" "e")) 501 ("�" ("dlouh�" "i")) 502 ("�" ("dlouh�" "o")) 503 ("�" ("dlouh�" "u")) 504 ("�" ("u" "s" "krou�kem")) 505 ("w" ("dvojit�" "v")) 506 ("�" ("dlouh�" "y")) 507 ("`" ("obr�cen�" "apostrof")) 508 ("\\" ("zp�tn�" "lom�tko")) 509 (">" ("v�t��" "ne�")) 510 ("<" ("men��" "ne�")) 511 ("[" ("lev�" "hranat�")) 512 ("]" ("prav�" "hranat�")) 513 ("{" ("lev�" "slo�en�")) 514 ("}" ("prav�" "slo�en�")) 515 ("(" ("lev�" "kulat�")) 516 (")" ("prav�" "kulat�")) 517 ("=" ("rovn�" "se")) 518 ("\n" ("nov�" "��dek")) 519 ("os/2" ("OS" "2")) 520 ("km/h" ("kilometr�" "za" "hodinu")) 521 ("m/s" ("metr�" "za" "sekundu")) 522 )) 523 524(define (czech-remove element list) 525 (cond 526 ((null? list) list) 527 ((equal? element (car list)) (czech-remove element (cdr list))) 528 (t (cons (car list) (czech-remove element (cdr list)))))) 529 530(define (czech-number name) 531 (cond 532 ((string-matches name "^[-+].*") 533 (cons (substring name 0 1) 534 (czech-number (czech-suffix name 1)))) 535 ((string-matches name ".*[,.].*") 536 (let ((comma (if (string-matches name ".*,.*") "," "."))) 537 (append (czech-number (string-before name comma)) 538 (list comma) 539 (czech-number (string-after name comma))))) 540 ((string-equal name "0") 541 (list "nula")) 542 ((string-matches name "^0..*") 543 (cons "nula" (czech-number (czech-suffix name 1)))) 544 (t 545 (czech-number-from-digits (czech-remove (car (symbolexplode " ")) 546 (symbolexplode name)))))) 547 548(define (czech-digits-1 digits) 549 (if czech-group-digits 550 (let ((n (string-length digits))) 551 (while (> (- n czech-group-digits) 0) 552 (set! n (- n czech-group-digits))) 553 (append (czech-number (substring digits 0 n)) 554 (if (> (length digits) czech-group-digits) 555 (czech-digits (czech-suffix digits n)) 556 nil))) 557 (czech-number digits))) 558 559(define (czech-digits digits) 560 (cond 561 ((string-equal digits "") 562 '()) 563 ((string-matches digits "^0.*") 564 (append (czech-number "0") (czech-digits (czech-suffix digits 1)))) 565 (t 566 (czech-digits-1 digits)))) 567 568(define (czech-prepend-numprefix token name) 569 (if (czech-item.has-feat token 'numprefix) 570 (string-append (item.feat token 'numprefix) name) 571 name)) 572 573(define (czech-number* token name) 574 (czech-number (czech-prepend-numprefix token name))) 575 576(define (czech-number@ name) 577 (cond 578 ((string-equal name "0") 579 '("nula")) 580 ((string-equal name "00") 581 '("nula" "nula")) 582 ((string-matches name "0[1-9]") 583 (cons "nula" (czech-number (string-after name "0")))) 584 (t 585 (czech-number name)))) 586 587(define (czech-number-from-digits digits) 588 (let ((len (length digits))) 589 (cond 590 ((equal? len 1) 591 (let ((d (car digits))) 592 (cond 593 ((string-equal d "0") ()) 594 ((string-equal d "1") (list "jedna")) 595 ((string-equal d "2") (list "dva")) 596 ((string-equal d "3") (list "t�i")) 597 ((string-equal d "4") (list "�ty�i")) 598 ((string-equal d "5") (list "p�t")) 599 ((string-equal d "6") (list "�est")) 600 ((string-equal d "7") (list "sedm")) 601 ((string-equal d "8") (list "osm")) 602 ((string-equal d "9") (list "dev�t"))))) 603 ((equal? len 2) 604 (if (string-equal (car digits) "1") 605 (let ((d (car (cdr digits)))) 606 (cond 607 ((string-equal d "0") (list "deset")) 608 ((string-equal d "1") (list "jeden�ct")) 609 ((string-equal d "2") (list "dvan�ct")) 610 ((string-equal d "3") (list "t�in�ct")) 611 ((string-equal d "4") (list "�trn�ct")) 612 ((string-equal d "5") (list "patn�ct")) 613 ((string-equal d "6") (list "�estn�ct")) 614 ((string-equal d "7") (list "sedmn�ct")) 615 ((string-equal d "8") (list "osmn�ct")) 616 ((string-equal d "9") (list "devaten�ct")))) 617 (append 618 (let ((d (car digits))) 619 (cond 620 ((string-equal d "0") ()) 621 ((string-equal d "2") (list "dvacet")) 622 ((string-equal d "3") (list "t�icet")) 623 ((string-equal d "4") (list "�ty�icet")) 624 ((string-equal d "5") (list "pades�t")) 625 ((string-equal d "6") (list "�edes�t")) 626 ((string-equal d "7") (list "sedmdes�t")) 627 ((string-equal d "8") (list "osmdes�t")) 628 ((string-equal d "9") (list "devades�t")))) 629 (czech-number-from-digits (cdr digits))))) 630 ((equal? len 3) 631 (append 632 (let ((d (car digits))) 633 (cond 634 ((string-equal d "0") ()) 635 ((string-equal d "1") (list "sto")) 636 ((string-equal d "2") (list "dv�" "st�")) 637 ((string-equal d "3") (list "t�i" "sta")) 638 ((string-equal d "4") (list "�ty�i" "sta")) 639 ((string-equal d "5") (list "p�t" "set")) 640 ((string-equal d "6") (list "�est" "set")) 641 ((string-equal d "7") (list "sedm" "set")) 642 ((string-equal d "8") (list "osm" "set")) 643 ((string-equal d "9") (list "dev�t" "set")))) 644 (czech-number-from-digits (cdr digits)))) 645 ((<= len 12) 646 (let ((concatenations '((t "tis�c" "tis�ce" "tis�c") 647 (t "milion" "miliony" "milion�") 648 (nil "miliarda" "miliardy" "miliard"))) 649 (n (- len 3))) 650 (while (> n 3) 651 (set! concatenations (cdr concatenations)) 652 (set! n (- n 3))) 653 (let ((m n) 654 (head-digits ()) 655 (tail-digits digits) 656 (words (car concatenations))) 657 (while (> m 0) 658 (set! head-digits (cons (car tail-digits) head-digits)) 659 (set! tail-digits (cdr tail-digits)) 660 (set! m (- m 1))) 661 (set! head-digits (reverse head-digits)) 662 (append 663 (cond 664 ((let ((all-zero t) 665 (d head-digits)) 666 (while (and all-zero d) 667 (if (string-equal (car d) "0") 668 (set! d (cdr d)) 669 (set! all-zero nil))) 670 all-zero) 671 nil) 672 ((and (equal? n 1) (string-equal (car digits) "1")) 673 (list (car (cdr words)))) 674 ((and (equal? n 1) (string-matches (car digits) "[2-4]")) 675 (list 676 (cond 677 ((string-equal (car digits) "2") 678 (if (car words) "dva" "dv�")) 679 ((string-equal (car digits) "3") "t�i") 680 ((string-equal (car digits) "4") "�ty�i")) 681 (car (cdr (cdr words))))) 682 (t 683 (append 684 (czech-number-from-digits head-digits) 685 (list (car (cdr (cdr (cdr words)))))))) 686 (czech-number-from-digits tail-digits))))) 687 (t 688 (if czech-group-digits 689 (czech-digits (apply string-append digits)) 690 (apply append (mapcar czech-number digits))))))) 691 692(define (czech-tokenize-on-nonalphas string) 693 (cond 694 ((string-equal string "") 695 nil) 696 ((string-matches string (string-append "^" czech-char-regexp "*$")) 697 (list string)) 698 ((string-matches string "^[0-9]+$") 699 (symbolexplode string)) 700 (t 701 (let ((i 0)) 702 (while (string-matches (substring string i 1) czech-char-regexp) 703 (set! i (+ i 1))) 704 (if (eq? i 0) 705 (while (string-matches (substring string i 1) "[0-9]") 706 (set! i (+ i 1)))) 707 (append (if (> i 0) 708 (let ((s (substring string 0 i))) 709 (if (string-matches s "[0-9]+") 710 (symbolexplode s) 711 (list s))) 712 nil) 713 (list (substring string i 1)) 714 (czech-tokenize-on-nonalphas 715 (czech-suffix string (+ i 1)))))))) 716 717(define (czech-token-to-words token name) 718 (cond 719 ;; Special terms 720 ((assoc_string (czech-downcase name) czech-multiword-abbrevs) 721 (apply append (mapcar (lambda (w) (czech-token-to-words token w)) 722 (cadr (assoc_string (czech-downcase name) 723 czech-multiword-abbrevs))))) 724 ((and (string-matches name "[ckm]m") 725 (item.prev token) 726 (czech-item.feat*? token "p.name" "[-+]?[0-9]+[.,]?[0-9]*")) 727 (list (cadr (assoc_string name '(("cm" "centimetr�") ("km" "kilometr�") 728 ("mm" "milimetr�")))))) 729 ;; Spaced numbers 730 ((and (or (string-matches name "^[-+]?[1-9][0-9]?[0-9]?$") 731 (czech-item.has-feat token 'numprefix)) 732 (not (czech-item.has-feat token 'punc)) 733 (item.feat token "n.whitespace" " ") 734 (string-matches (item.feat token "n.name") "^[0-9][0-9][0-9]$")) 735 (item.set_feat (item.next token) 'numprefix 736 (czech-prepend-numprefix token name)) 737 nil) 738 ;; Ordinal numbers 739 ((and (string-matches name "^[0-9]+$") 740 (czech-item.feat? token 'punc ".") 741 (item.next token) 742 (not (string-matches (item.feat token "n.whitespace") " +"))) 743 (if (not (czech-item.has-feat token 'punctype)) 744 (item.set_feat token 'punctype 'num)) 745 (append (czech-number* token name) 746 (list "."))) 747 ;; Numbers beginning with the zero digit 748 ((and (string-matches name "^0[0-9]*$") 749 (not (czech-item.has-feat token 'numprefix))) 750 (czech-digits name)) 751 ;; Any other numbers 752 ((let ((nname (czech-prepend-numprefix token name))) 753 (or (string-matches nname "^[-+]?[0-9]+$") 754 (string-matches nname "^[-+]?[0-9]+[.,][0-9]+$") 755 (string-matches nname "^[-+]?[0-9]+,-$"))) 756 (if (not (czech-item.has-feat token 'punctype)) 757 (item.set_feat token 'punctype 'num)) 758 (let ((nname (czech-prepend-numprefix token name))) 759 (if (and (czech-item.feat? token "n.name" "K�") 760 (string-matches nname "^[-+]?[0-9]+,[-0-9]+$")) 761 (append 762 (czech-number (string-before nname ",")) 763 (list "korun") 764 (let ((hellers (string-after nname ","))) 765 (if (not (string-equal hellers "-")) 766 (append 767 (czech-number hellers) 768 (list "hal���"))))) 769 (czech-number nname)))) 770 ;; Monetary sign 771 ((and (string-equal name "K�") 772 (string-matches (item.feat token "p.name") "^[-+]?[0-9]+,[-0-9]+$")) 773 nil) 774 ;; Acronyms 775 ((let ((capitals "^[A-Z����������ة����ݮ]+$")) 776 (and (string-matches name capitals) 777 (not (lex.lookup_all name)) 778 (not (string-matches (item.feat token "p.name") capitals)) 779 (not (string-matches (item.feat token "p.next") capitals)) 780 (<= (length name) 3) ; longer pronouncable acronyms are not spelled 781 (not (string-equal name "�")) ; Festival bug workaround 782 )) 783 (let ((words ())) 784 (mapcar 785 (lambda (phoneme) 786 (let ((expansion (cadr (assoc_string (czech-downcase phoneme) 787 czech-multiword-abbrevs)))) 788 (if expansion 789 (set! words (append words 790 (mapcar (lambda (w) 791 `((name ,w) (pos sym))) 792 expansion))) 793 (set! words (append words 794 (list `((name ,phoneme) (pos sym)))))))) 795 (lts.apply name 'czech-normalize)) 796 words)) 797 ;; Abbreviations and other unpronouncable words 798 ((and (string-matches 799 name 800 "^[bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ����������ة��][bcdfghjkmnpqstvwxzBCDFGHJKMNPQSTVWXZ����������ة��]+$") 801 (not (lex.lookup_all name))) 802 (mapcar (lambda (phoneme) `((name ,phoneme) (pos sym))) 803 (lts.apply name 'czech-normalize))) 804 ;; Separators 805 ((and (string-matches name (string-append "^[^" czech-chars "0-9]+$")) 806 (>= (length name) 4) 807 (czech-all-same (symbolexplode name))) 808 (list czech-token.separator-word-name)) 809 ((and (string-matches name (string-append "^[^" czech-chars "0-9]$")) 810 (eqv? (length (item.daughters token)) 0) 811 (let ((punc (item.feat token 'punc))) 812 (and (string-matches punc "...+") ; excludes, among others, punc==0 813 (string-equal (substring punc 0 1) name) 814 (czech-all-same (symbolexplode punc))))) 815 (item.set_feat token 'punc 0) 816 (list czech-token.separator-word-name)) 817 ;; Time (just a few of many possible forms) 818 ((and (string-matches name "^[0-9]+:[0-9][0-9]$") 819 ;; try to identify ratios -- should be better done in POS tagging 820 (not (string-matches (item.feat token "p.name") 821 "^[Pp][Oo][Mm][��].*")) 822 (not (string-matches (item.feat token "p.name") 823 "^[Pp][Rr][Aa][Vv][Dd][��][Pp][Oo][Dd][Oo].*")) 824 (not (string-matches (item.feat token "p.name") 825 "^[��][Aa][Nn][Cc].*"))) 826 (append (czech-number@ (string-before name ":")) 827 (czech-number@ (string-after name ":")))) 828 ((string-matches name "^[0-9]+:[0-9][0-9]:[0-9][0-9]$") 829 (append (czech-number@ (string-before name ":")) 830 (czech-number@ (string-before (string-after name ":") ":")) 831 (czech-number@ (string-after (string-after name ":") ":")))) 832 ;; Ratios 833 ((string-matches name "^[0-9]+:[0-9]+$") 834 (append (czech-number (string-before name ":")) 835 '("ku") 836 (czech-number (string-after name ":")))) 837 ;; Numeric ranges (might be minus as well, but that's rare) 838 ((string-matches name "[0-9]+[.,]*[0-9]*-[0-9]+[.,]*[0-9]*$") 839 ;; we don't include signs here not to break phone numbers and such a 840 ;; written form is incorrect anyway 841 (append 842 (czech-token-to-words token (string-append 843 (substring name 0 1) 844 (string-before (substring name 1 1000) "-"))) 845 '(((name "-") (pos range))) 846 (czech-token-to-words token (string-after (substring name 1 1000) "-")))) 847 ;; Homogenous tokens 848 ((string-matches name (string-append "^" czech-char-regexp "+$")) 849 (if (string-equal (czech-downcase name) "�") ; Festival bug workaround 850 (list "e�") 851 (list name))) 852 ((string-matches name (string-append "^[^" czech-chars "0-9]+$")) 853 (cond 854 ((> (length name) 10) 855 (list czech-token.garbage-word-name)) 856 ((and (eqv? (length name) 1) 857 (string-equal (item.name token) name) 858 (or (not (string-matches (item.feat token 'prepunctuation) "0?")) 859 (not (string-matches (item.feat token 'punctuation) "0?")))) 860 ;; This handles the case when the whole token consists of two or more 861 ;; punctuation characters. In such a case Festival picks one of the 862 ;; characters as the name, while the other characters are treated as 863 ;; punctuation. We want all the character being handled as punctuation. 864 `(((name ,name) (pos punc)))) 865 ((assoc_string name czech-multiword-abbrevs) 866 (cadr (assoc_string name czech-multiword-abbrevs))) 867 (t 868 (symbolexplode name)))) 869 ;; Hyphens 870 ((string-matches name (string-append "^" czech-char-regexp "+-$")) 871 (czech-token-to-words token (string-before name "-"))) 872 ((string-matches name 873 (string-append "^[" czech-chars "0-9]+-[-" czech-chars "0-9]+$")) 874 (append 875 (czech-token-to-words token (string-before name "-")) 876 '(((name "-") (pos punc))) ; necessary for punctuation reading modes 877 (czech-token-to-words token (string-after name "-")))) 878 ;; Starting with digits 879 ((string-matches name "^[0-9].*") 880 (let ((i 0)) 881 (while (member (substring name i 1) 882 '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) 883 (set! i (+ i 1))) 884 (append (czech-digits (substring name 0 i)) 885 (czech-token-to-words token (czech-suffix name i))))) 886 ;; Digits inside 887 ((string-matches name "^.*[0-9].*") 888 (let ((i 0) 889 j 890 (digits '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))) 891 (while (not (member (substring name i 1) digits)) 892 (set! i (+ i 1))) 893 (set! j (+ i 1)) 894 (while (member (substring name j 1) digits) 895 (set! j (+ j 1))) 896 (append (czech-token-to-words token (substring name 0 i)) 897 (czech-digits (substring name i (- j i))) 898 (czech-token-to-words token (czech-suffix name j))))) 899 ;; Lexicon words 900 ((lex.lookup_all name) 901 (list name)) 902 ;; TODO: roman numerals 903 ;; Heterogenous tokens -- mixed alpha, numeric and non-alphanumeric 904 ;; characters 905 (t 906 (if (not (string-matches name (string-append "^[-" czech-chars "]+$"))) 907 (item.set_feat token 'punctype nil)) 908 (apply 909 append 910 (mapcar (lambda (name) (czech-token-to-words token name)) 911 (czech-tokenize-on-nonalphas name)))))) 912 913;;; Lexicon 914 915(defvar czech-lexicon-file "czech-lexicon.out") 916 917(lex.create "czech") 918(lex.set.phoneset "czech") 919(lex.select "czech") 920(let ((dirs '("/usr/local/share/festival/lib/dicts")) 921 (lexfile nil)) 922 (while dirs 923 (let ((file (path-append (car dirs) czech-lexicon-file))) 924 (if (probe_file file) 925 (begin 926 (set! lexfile file) 927 (set! dirs nil)))) 928 (set! dirs (cdr dirs))) 929 (if lexfile 930 (lex.set.compile.file lexfile) 931 (format t "warning: Czech lexicon file not found\n"))) 932(lex.set.lts.method 'czech-lts) 933(lex.add.entry '("nezn�m�" nil (((n e z n a: m e:) 0)))) 934 935;;; Part of Speech 936 937(defvar czech-guess-pos 938 '((prep0 "k" "s" "v" "z") 939 (prep "bez" "beze" "b�hem" "do" "ke" "ku" "krom" "krom�" "mezi" "mimo" 940 "m�sto" "na" "nad" "nade" "o" "od" "ode" "okolo" "po" "pod" "pode" 941 "pro" "proti" "p�ed" "p�ede" "p�es" "p�eze" "p�i" "se" "skrz" 942 "skrze" "u" "ve" "vyjma" "za" "ze" "zpoza") 943 (conj "a" "i" "ani" "nebo" "anebo") 944 (particle "a�" "k�" "nech�") 945 (question "co" "�emu" "��" "jak" "jak�" "jak�" "jak�" "kam" "kde" 946 "kdo" "kdy" "koho" "kolik" "kolik�t�" "kolik�t�" "kolik�t�" 947 "komu" "kterak" "kter�" "kter�" "kter�ho" "kter�mu" "kter�" 948 "kudy" "na�" "nakolik" "odkud" "pokolik�t�" "pro�") 949 (misc "aby" "abych" "abys" "abychom" "abyste" "ale" "alespo�" "aneb" "ani" 950 "ani�" "an�to" "aspo�" "av�ak" "a�" "a�" "a�koli" "a�koliv" "bu�" 951 "bu�to" "bu�si" "by" "by�" "by�si" "coby" "�i" "�ili" "div" 952 "dokdy" "dokonce" "dokud" "dotud" "jakby" "jakkoli" "jakkoliv" 953 "jakmile" "jako" "jakoby" "jako�" "jako�to" "jednak" "jednou" 954 "jeliko�" "jen" "jenom" "jenom�e" "jen�e" "jestli" "jestli�e" "je�t�" 955 "je�to" "jinak" "kde�to" "kdybych" "kdybys" 956 "kdyby" "kdybychom" "kdybyste" "kdy�" "kv�li" 957 "leda" "leda�e" "le�" "mezit�mco" "mimoto" "na�e�" "neb" "neboli" 958 "nebo�" "nejen" "nejen�e" "ne�" "ne�li" "ne�kuli" "nicm�n�" "n�br�" 959 "odkdy" "odkud" "pak" "pakli" "pakli�e" "podle" "podm�nky" "pokud" 960 "pon�vad�" "pop��pad�" "potom" "potud" "pot�" "pro�e�" "proto" 961 "proto�e" "pr�v�" "p�ece" "p�esto�e" "p�itom" "respektive" "sic" 962 "sice" "sotva" "sotva�e" "tak" "takov�" "taktak" "tak�e" "tak�" 963 "tedy" "ten" "teprve" "to" "toho" "tolik" "tomu" "toti�" "tu" "tud�" 964 "t�m" "t�eba" "t�ebas" "t�ebas�e" "t�eba�e" "v�ak" "v�dy�" "zat�mco" 965 "zda" "zdali" "zejm�na" "zrovna" "zvl�t�" "�e"))) 966 967(define (czech-word-pos? word pos) 968 (member (item.name word) 969 (apply append (mapcar (lambda (p) (cdr (assoc p czech-guess-pos))) 970 (if (consp pos) pos (list pos)))))) 971 972(define (czech-pos-in-phrase-from word) 973 (let ((result 1) 974 (w word)) 975 (while (and (item.prev w) 976 (or (not (czech-item.feat*? w "R:Token.p.name" "0?")) 977 (and (czech-item.feat*? w "p.R:Token.parent.punc" "0?") 978 (czech-item.feat*? w "R:Token.parent.prepunctuation" 979 "0?") 980 (not (czech-item.feat*? 981 w "p.name" 982 (string-append "^[^" czech-chars "0-9]+$")))))) 983 (set! result (+ result 1)) 984 (set! w (item.prev w))) 985 result)) 986 987(define (czech-pos-in-phrase-to word) 988 (let ((result 1) 989 (w word)) 990 (while (and (item.next w) 991 (or (czech-item.feat*? w "R:Token.n.name" "0?") 992 (and (czech-item.feat*? w "R:Token.parent.punc" "0?") 993 (czech-item.feat*? 994 w "R:Token.parent.n.prepunctuation" "0?") 995 (not (czech-item.feat*? 996 w "n.name" 997 (string-append "^[^" czech-chars "0-9]+$")))))) 998 (set! result (+ result 1)) 999 (set! w (item.next w))) 1000 result)) 1001 1002(define (czech-pos-last-in-phrase? word) 1003 (<= (czech-pos-in-phrase-to word) 1)) 1004 1005(define (czech-pos utt) 1006 (mapcar 1007 (lambda (w) 1008 (let ((name (czech-downcase (item.name w))) 1009 (token (item.parent (item.relation w 'Token)))) 1010 (cond 1011 ;; Feature already assigned 1012 ((czech-item.has-feat w 'pos) 1013 nil) 1014 ;; Word followed by a punctuation 1015 ((and (czech-item.has-feat token 'punctype) 1016 (string-matches name (string-append "^[^" czech-chars "0-9]+$"))) 1017 (item.set_feat w 'pos (item.feat token 'punctype))) 1018 ;; Punctuation 1019 ((member name '("\"" "'" "`" "-" "." "," ":" ";" "!" "?" "(" ")")) 1020 ;; Is it a separate punctuation character? 1021 (if (eqv? (length 1022 (item.daughters (item.parent (item.relation w 'Token)))) 1023 1) 1024 (item.set_feat w 'pos nil) 1025 (item.set_feat w 'pos 'punc))) 1026 ;; Single letter, not in the role of a word 1027 ((and (eq? (string-length name) 1) 1028 (czech-pos-last-in-phrase? w)) 1029 (item.set_feat w 'pos 'sym)) 1030 ;; Word "se", not in the role of a preposition 1031 ((and (string-equal name "se") ; the word "se" 1032 (item.prev w) ; not the first word 1033 (or (czech-pos-last-in-phrase? w) ; final word 1034 (czech-word-pos? (item.next w) '(prep0 prep)) 1035 ; followed by a preposition 1036 )) 1037 (item.set_feat w 'pos 'se)) 1038 ;; Question words with the `pak' suffix 1039 ((and (string-matches name ".*pak") 1040 (member (substring name 0 (- (length name) 3)) 1041 (cdr (assoc 'question czech-guess-pos)))) 1042 (item.set_feat w 'pos 'question)) 1043 ;; Nothing special: check the czech-guess-pos tree 1044 (t 1045 (let ((pos-sets czech-guess-pos)) 1046 (while pos-sets 1047 (if (member name (cdar pos-sets)) 1048 (begin 1049 (item.set_feat w 'pos (caar pos-sets)) 1050 (set! pos-sets nil)) 1051 (set! pos-sets (cdr pos-sets))))) 1052 )))) 1053 (utt.relation.items utt 'Word)) 1054 ;; Add commas before conjunctions 1055 (mapcar (lambda (token) 1056 (if (and (czech-item.feat*? token 'punc "0?") 1057 (czech-item.feat? token "daughtern.R:Word.n.gpos" 'conj)) 1058 (item.set_feat token 'punc ","))) 1059 (utt.relation.items utt 'Token)) 1060 utt) 1061 1062;;; Phrase breaks 1063 1064(define (czech-next-simple-punc word) 1065 (let ((unit (item.next (czech-word-stress-unit word)))) 1066 (cond 1067 ((not unit) 1068 0) 1069 ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]") 1070 (czech-stress-unit-punc unit)) 1071 ((czech-item.feat? unit 'preelement 1) 1072 (czech-next-punc word)) 1073 (t 1074 0)))) 1075 1076(define (czech-prev-simple-punc word) 1077 (let ((unit (item.prev (czech-word-stress-unit word)))) 1078 (cond 1079 ((not unit) 1080 0) 1081 ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]") 1082 (czech-stress-unit-punc unit)) 1083 (t 1084 (let ((token (item.prev (item.parent (item.relation word 'Token))))) 1085 (while (and token (not (string-matches (item.feat token 'punc) ".+"))) 1086 (set! token (item.prev token))) 1087 (let ((pword (and token 1088 (item.next token) 1089 (item.daughter1 (item.next token))))) 1090 (if (and pword 1091 (czech-item.feat? (czech-word-stress-unit pword) 1092 'preelement 1)) 1093 (item.feat token 'punc) 1094 0))))))) 1095 1096(defvar czech-phrase-cart-tree 1097 ;; Note: Additional corrections are applied in czech-adjust-phrase-breaks 1098 ;; SB = (very) short break 1099 '(;; end of utterance 1100 (n.name is 0) 1101 ((BB)) 1102 ;; exclude "punctuation words" 1103 ((name matches "[][\"'`.,:;!?(){}<>-]+") 1104 ((NB)) 1105 ;; parentheses 1106 ((R:Token.parent.n.prepunctuation matches "(.*") 1107 ((R:Token.n.name is 0) 1108 ((B)) 1109 ((NB))) 1110 ((lisp_token_end_punc matches ".*)") 1111 ((B)) 1112 ;; 1113 ;; phonetic rules 1114 ;; 1115 ;; "big" punctuations 1116 ((lisp_token_end_punc matches ".*[.?!;]\"") 1117 ((BB)) 1118 ((lisp_token_end_punc matches ".*[.?!;]") 1119 ((lisp_czech-next-token-punc matches "\".*") 1120 ((BB)) 1121 ((XB1))) ; for following adjustments 1122 ;; "smaller" punctuations 1123 ((lisp_token_end_punc matches ".*[:-]") 1124 ;; dashes are treated as pbreaks only if separated by whitespaces 1125 ((R:Token.parent.n.daughter1.name is "-") 1126 ((R:Token.n.name is 0) 1127 ((B)) 1128 ((NB))) 1129 ((B))) 1130 ;; "comma" punctuations 1131 ((lisp_token_end_punc matches ".*,") 1132 ((XB2)) ; for following adjustments 1133 ;; nothing applies -- no break by default 1134 ((NB))))))))))) 1135 1136(define (czech-adjust-phrase-breaks utt) 1137 ;; This must be called after stress units are identified 1138 (mapcar (lambda (w) 1139 (cond 1140 ((czech-item.feat? w 'pbreak 'XB1) ; "big" punctuations 1141 ;; only one stress unit between punctuations makes them shorter 1142 (item.set_feat 1143 w 'pbreak 1144 (cond 1145 ((czech-item.feat? w "R:SylStructure.name" 0) 1146 ;; not a word actually 1147 'BB) 1148 ((or (czech-item.feat*? (czech-word-stress-unit w) 1149 "n.lisp_czech-stress-unit-punc" 1150 ".*[.?!;]\"?") 1151 (czech-item.feat*? (czech-word-stress-unit w) 1152 "p.lisp_czech-stress-unit-punc" 1153 ".*[.?!;]\"?")) 1154 'B) 1155 (t 1156 'BB)))) 1157 ((czech-item.feat? w 'pbreak 'XB2) ; "comma" punctuations 1158 ;; if only one stress unit separates from other punctuation or 1159 ;; the neighbor stress unit contains preelement, phrase break 1160 ;; *may* become shorter 1161 (item.set_feat 1162 w 'pbreak 1163 (cond 1164 ((czech-item.feat? w "R:SylStructure.name" 0) 1165 ;; not a word actually 1166 'B) 1167 ((czech-item.feat*? w "lisp_czech-next-simple-punc" ".*,") 1168 'SB) 1169 ((czech-item.feat*? w "lisp_czech-prev-simple-punc" ".*,") 1170 'B) 1171 ((czech-item.feat*? w "lisp_czech-prev-simple-punc" 1172 ".*[-.?!;:]\"?") 1173 'SB) 1174 ((czech-item.feat*? (czech-word-stress-unit w) 1175 "n.lisp_czech-stress-unit-punc" 1176 ".*[-.?!;:]\"?") 1177 'SB) 1178 (t 1179 'B)))))) 1180 (utt.relation.items utt 'Word))) 1181 1182;;; Segmentation 1183 1184(define (czech-adjust-segments segments) 1185 (if (not (null? segments)) 1186 (let ((item1 (nth 0 segments)) 1187 (item2 (nth 1 segments)) 1188 (item3 (nth 2 segments)) 1189 (item-word (lambda (i) 1190 (item.parent 1191 (item.parent 1192 (item.relation i 'SylStructure)))))) 1193 (let ((name1 (and item1 (item.name item1))) 1194 (name2 (and item2 (item.name item2))) 1195 (name3 (and item3 (item.name item3))) 1196 (same-word? (lambda (i1 i2) 1197 (equal? (item-word i1) (item-word i2))))) 1198 ;; nasals 1199 (if (and (string-equal name1 "n") 1200 (czech-item.feat? item2 "ph_postnas" '+) 1201 (same-word? item1 item2)) 1202 (item.set_name item1 "n*")) 1203 ;; sh 1204 (if (and (string-equal name1 "s") 1205 (string-equal name2 "h") 1206 (same-word? item1 item2)) 1207 (if czech-moravian 1208 (item.set_name item1 "z") 1209 (item.set_name item2 "ch"))) 1210 ;; unvoiced-r~ 1211 (if (and (string-equal name2 "r~") 1212 (czech-item.feat? item1 "ph_cvox" '-) 1213 (same-word? item1 item2)) 1214 (item.set_name item2 "r~*")) 1215 ;; voiced-unvoiced 1216 (if (and (czech-item.feat? item1 "ph_cvox" '+) 1217 (not (czech-item.feat? item1 "ph_partner" 0)) 1218 item2 1219 (or (string-equal name2 "#") 1220 (string-equal name2 "_") 1221 (czech-item.feat? item2 "ph_cvox" '-) 1222 (and (czech-item.feat? item2 "ph_cvox" 'u) 1223 (not (same-word? item1 item2)) 1224 (not (member 1225 (item.name (item-word item1)) 1226 (append 1227 (list "v" "z") 1228 czech-proper-single-syl-prepositions)))))) 1229 (item.set_name item1 (item.feat item1 "ph_partner"))) 1230 ;; unvoiced-voiced 1231 (if (and (czech-item.feat? item1 "ph_cvox" '-) 1232 (not (czech-item.feat? item1 "ph_partner" 0)) 1233 item2 1234 (czech-item.feat? item2 "ph_cvox" '+) 1235 (not (string-equal name2 "v")) 1236 (not (string-equal name2 "r~"))) 1237 (item.set_name item1 (item.feat item1 "ph_partner")))) 1238 (czech-adjust-segments (cdr segments))))) 1239 1240(define (czech-adjust-phonetic-form utt) 1241 (let ((items (utt.relation.items utt 'Segment))) 1242 (let ((names (mapcar item.name items)) 1243 (old-names '())) 1244 (while (not (equal? old-names names)) 1245 (czech-adjust-segments items) 1246 (set! old-names names) 1247 (set! names (mapcar item.name (utt.relation.items utt 'Segment)))))) 1248 utt) 1249 1250(define (czech-intonation-units utt) 1251 ;; Mark syllables before phrase breaks 1252 (let ((token (utt.relation utt 'Token))) 1253 (while token 1254 (if (or (czech-item.feat*? token "daughtern.pbreak" "[SBX]?B[12]?") 1255 (czech-item.feat*? token "daughtern.p.pbreak" "[SBX]?B[12]?")) 1256 (let ((w (item.daughtern token))) 1257 (while (and w 1258 (not (item.daughters (item.relation w 'SylStructure)))) 1259 (set! w (item.prev w))) 1260 (if w 1261 (item.set_feat (item.daughtern (item.relation w 'SylStructure)) 1262 "sentence_break" 1)))) 1263 (set! token (item.next token)))) 1264 ;; Make the intonation units 1265 (utt.relation.create utt 'IntUnit) 1266 (let ((sylwords (utt.relation.items utt 'Syllable)) 1267 (id 1) 1268 (unit-sylwords '())) 1269 (while sylwords 1270 (let ((w (car sylwords))) 1271 (set! unit-sylwords (cons w unit-sylwords)) 1272 (set! sylwords (cdr sylwords)) 1273 ;; If `w' is a last syllable before a relevant phrase break, make new 1274 ;; intonation unit 1275 (if (or (czech-item.feat*? w "sentence_break" 1) 1276 ;; This is the very last syllable (we reach this point when the 1277 ;; last token generates no words for whatever reason) 1278 (not (item.next w))) 1279 (begin 1280 (utt.relation.append 1281 utt 'IntUnit 1282 `("int" ((name ,(format nil "IUnit%d" id))))) 1283 (set! id (+ id 1)) 1284 ;; Add the syllables to the intonation unit 1285 (let ((i (utt.relation.last utt 'IntUnit))) 1286 (set! unit-sylwords (reverse unit-sylwords)) 1287 (while unit-sylwords 1288 (item.append_daughter i (car unit-sylwords)) 1289 (set! unit-sylwords (cdr unit-sylwords)))))))))) 1290 1291(define (czech-yes-no-question int-unit) 1292 (and (string-matches (item.feat 1293 int-unit 1294 "daughtern.R:SylStructure.parent.R:Token.parent.punc") 1295 ".*\\?") 1296 (not (czech-item.feat? int-unit 1297 "daughter1.R:SylStructure.parent.R:Word.pos" 1298 'question)) 1299 (not (czech-item.feat? int-unit 1300 "daughter2.R:SylStructure.parent.R:Word.pos" 1301 'question)))) 1302 1303(defvar czech-proper-single-syl-prepositions 1304 '("bez" "do" "ke" "ku" "na" "nad" "o" "od" "po" "pod" "pro" "p�ed" "p�es" 1305 "p�i" "se" "u" "ve" "za" "ze")) 1306(defvar czech-special-final-words 1307 '("ho" "je" "jej" "ji" "jsem" "jsi" "jste" "m�" "mi" "se" "si" "t�" "ti")) 1308 1309(define (czech-syllable-kernels phonemes) 1310 (let ((kernels '())) 1311 (while phonemes 1312 ;; Starting syllabic consonant doesn't constitute syllable 1313 (if (and (czech-item.feat? (car phonemes) 'ph_vc '-) 1314 (czech-item.feat? (car phonemes) 'ph_syl '+)) 1315 (set! phonemes (cdr phonemes))) 1316 ;; Skip non-syllabic consonants 1317 (while (and phonemes (czech-item.feat? (car phonemes) 'ph_syl '-)) 1318 (set! phonemes (cdr phonemes))) 1319 (if phonemes 1320 ;; Now take the kernel 1321 (let ((kc '()) 1322 (kv '())) 1323 (if (czech-item.feat? (car phonemes) 'ph_vc '-) 1324 (while (and phonemes 1325 (czech-item.feat? (car phonemes) 'ph_vc '-) 1326 (czech-item.feat? (car phonemes) 'ph_syl '+)) 1327 (set! kc (cons (car phonemes) kc)) 1328 (set! phonemes (cdr phonemes)))) 1329 (while (and phonemes 1330 (czech-item.feat? (car phonemes) 'ph_vc '+) 1331 (czech-item.feat? (car phonemes) 'ph_syl '+)) 1332 (set! kv (cons (car phonemes) kv)) 1333 (set! phonemes (cdr phonemes))) 1334 (let ((k (reverse (or kv kc)))) 1335 (let ((seg (and k (item.prev (car k))))) 1336 (while (and seg (or (czech-item.feat? seg 'ph_cvox '+) 1337 (czech-item.feat? seg 'ph_cvox 'u))) 1338 (set! k (cons seg k)) 1339 (set! seg (item.prev seg)))) 1340 (set! kernels (cons k kernels)))))) 1341 (reverse kernels))) 1342 1343(define (czech-syllable-count phonemes) 1344 (length (czech-syllable-kernels phonemes))) 1345 1346(define (czech-stress-unit-phonemes unit) 1347 (if (and unit (not (consp unit))) 1348 (set! unit (item.daughters unit))) 1349 (apply append (mapcar (lambda (syl) 1350 (if (not (eq? syl 'preelement)) 1351 (item.daughters 1352 (item.relation syl 'SylStructure)))) 1353 unit))) 1354 1355(define (czech-unit-syllable-count unit) 1356 (czech-syllable-count (czech-stress-unit-phonemes unit))) 1357 1358(define (czech-identify-stress-units sylwords) 1359 (let ((units (mapcar list sylwords)) 1360 (unit-word (lambda (unit) 1361 (and (eqv? (length unit) 1) 1362 (item.parent 1363 (item.relation (car unit) 'SylStructure))))) 1364 (unit-word-name (lambda (unit) 1365 (and (eqv? (length unit) 1) 1366 (item.feat (car unit) 1367 "R:SylStructure.parent.name")))) 1368 (merge (lambda (list) 1369 (set-car! list (append (car list) (cadr list))) 1370 (set-cdr! list (cddr list))))) 1371 ;; Nothing to do if there is at most one word 1372 (if (<= (length units) 1) 1373 units 1374 (begin 1375 ;; Basic joining 1376 (let ((units* units)) 1377 (while units* 1378 (let ((w (unit-word (car units*)))) 1379 (if (or ;; Join non-syllabic prepositions 1380 (czech-item.feat? w 'pos 'prep0) 1381 ;; Join proper single-syllabic prepositions 1382 (and (member (czech-downcase (item.name w)) 1383 czech-proper-single-syl-prepositions) 1384 (not (czech-item.feat? w "pos" "se")))) 1385 (merge units*))) 1386 (set! units* (cdr units*)))) 1387 ;; At most 1 word now? 1388 (if (<= (length units) 1) 1389 units 1390 (let ((last-unit (car (last units)))) 1391 ;; Final single-syllabic word 1392 (if (and (<= (czech-unit-syllable-count last-unit) 1) 1393 (not (member (unit-word-name last-unit) 1394 czech-special-final-words))) 1395 (set-cdr! (nth_cdr (- (length units) 2) units) '()) 1396 (set! last-unit '())) 1397 ;; Initial single-syllabic words 1398 (let ((units* units) 1399 (singles '())) 1400 (while (and units* 1401 (<= (czech-unit-syllable-count (car units*)) 1)) 1402 (set! singles (cons (car units*) singles)) 1403 (set! units* (cdr units*))) 1404 (set! singles (reverse singles)) 1405 (let ((len (length singles))) 1406 (cond 1407 ((<= len 0) 1408 nil) 1409 ((<= len 1) 1410 (set! units (cons (append (car singles) '(preelement) 1411 (car units*)) 1412 (cdr units*))) 1413 (set! units* units)) 1414 ((<= len 4) 1415 (set! units (cons (apply append singles) units*))) 1416 (t 1417 (let ((first-unit '()) 1418 (n (/ len 2)) 1419 (i 0)) 1420 (while (< i n) 1421 (set! first-unit (append (car singles) first-unit)) 1422 (set! singles (cdr singles)) 1423 (set! i (+ i 1))) 1424 (set! units (cons (reverse first-unit) 1425 (cons (apply append singles) 1426 units*))))))) 1427 ;; Middle word processing 1428 (while units* 1429 (let ((u (car units*))) 1430 ;; The word "a" 1431 (if (string-equal (unit-word-name u) "a") 1432 (merge units*)) 1433 ;; Single-syllabic words 1434 (let ((len (czech-unit-syllable-count u)) 1435 (singles '()) 1436 (slen 0) 1437 (next-units* (cdr units*))) 1438 (while (and next-units* 1439 (<= (czech-unit-syllable-count 1440 (car next-units*)) 1) 1441 (not (string-equal 1442 (unit-word-name (car next-units*)) 1443 "a"))) 1444 (set! singles (cons (car next-units*) singles)) 1445 (set! slen (+ slen 1)) 1446 (set! next-units* (cdr next-units*))) 1447 (set! singles (reverse singles)) 1448 (let ((merge-n (lambda (n units) 1449 (while (> n 0) 1450 (merge units) 1451 (set! n (- n 1)))))) 1452 (cond 1453 ((eqv? slen 0) 1454 nil) 1455 ((eqv? slen 1) 1456 (merge units*)) 1457 ((eqv? slen 2) 1458 (if (and (<= len 4) 1459 (czech-random-choice '(t nil))) 1460 (merge-n 2 units*) 1461 (merge (cdr units*)))) 1462 ((eqv? slen 3) 1463 (if (<= len 3) 1464 (merge-n 3 units*) 1465 (merge-n 2 (cdr units*)))) 1466 ((eqv? slen 4) 1467 (cond 1468 ((>= len 5) 1469 (merge-n 3 (cdr units*))) 1470 ((and (<= len 2) 1471 (czech-random-choice '(t nil))) 1472 (merge-n 4 units*)) 1473 (t 1474 (merge-n 2 units*) 1475 (merge-n 1 (cdr units*))))) 1476 ((eqv? slen 5) 1477 (cond 1478 ((<= len 3) 1479 (merge-n 2 units*) 1480 (merge-n 2 (cdr units*))) 1481 ((<= len 4) 1482 (merge-n 1 (cdr units*)) 1483 (merge-n 2 (cddr units*))) 1484 (t 1485 (merge-n 2 (cdr units*)) 1486 (merge-n 1 (cddr units*))))) 1487 ((eqv? slen 6) 1488 (cond 1489 ((>= len 4) 1490 (merge-n 2 (cdr units*)) 1491 (merge-n 2 (cddr units*))) 1492 ((czech-random-choice '(t nil)) 1493 (merge-n 2 units*) 1494 (merge-n 3 (cdr units*))) 1495 (t 1496 (merge-n 2 units*) 1497 (merge-n 1 (cdr units*)) 1498 (merge-n 1 (cddr units*))))) 1499 (t 1500 ;; This very rare case is not defined in the rules 1501 (while (>= slen 4) 1502 (merge-n 1 (cdr units*)) 1503 (set! units* (cdr units*)) 1504 (set! slen (- slen 2))) 1505 (merge-n (- slen 1) (cdr units*)) 1506 )) 1507 (set! units* next-units*))))) 1508 ;; That's all 1509 (if last-unit 1510 (append units (list last-unit)) 1511 units)))))))) 1512 1513(define (czech-stress-units utt) 1514 (utt.relation.create utt 'IntStress) 1515 (utt.relation.create utt 'StressUnit) 1516 (let ((id 1) 1517 (int-unit (utt.relation.first utt 'IntUnit))) 1518 (while int-unit 1519 (let ((stress-units (czech-identify-stress-units 1520 (item.daughters int-unit)))) 1521 ;; Add the intonation unit at the top of the StressUnit relation 1522 (utt.relation.append utt 'IntStress int-unit) 1523 (while stress-units 1524 ;; Create new stress unit 1525 (item.relation.append_daughter int-unit 'IntStress 1526 `("stress" ((name ,(format nil "SUnit%d" id)) (position "M")))) 1527 (set! id (+ id 1)) 1528 (utt.relation.append utt 'StressUnit 1529 (item.relation.daughtern int-unit 'IntStress)) 1530 ;; Fill it with its words 1531 (let ((i (utt.relation.last utt 'StressUnit))) 1532 (mapcar (lambda (syl) 1533 (if (eq? syl 'preelement) 1534 (item.set_feat i "preelement" 1) 1535 (begin 1536 (item.append_daughter i syl) 1537 (let ((j (item.daughtern i))) 1538 (mapcar (lambda (seg) 1539 (item.append_daughter j seg)) 1540 (item.daughters syl)))))) 1541 (car stress-units))) 1542 (set! stress-units (cdr stress-units)))) 1543 ;; The first stress unit in an intonation unit has position I 1544 (item.set_feat (item.relation.daughter1 int-unit 'IntStress) 1545 "position" "I") 1546 ;; The last stress unit in an intonation unit has position F or FF 1547 ;; (overrides I in case of a conflict) 1548 (item.set_feat (item.relation.daughtern int-unit 'IntStress) "position" 1549 (if (string-matches 1550 (item.feat int-unit 1551 "daughtern.R:SylStructure.parent.R:Token.parent.punc") 1552 ".*[.!?;:].*") 1553 (if (czech-yes-no-question int-unit) "FF-IT" "FF-KKL") 1554 "F")) 1555 ;; Special case: F-1 positions overriding I and M 1556 (if (not (equal? (item.relation.daughtern int-unit 'IntStress) 1557 (item.relation.daughter1 int-unit 'IntStress))) 1558 (let ((last-pos (item.feat int-unit 1559 "R:IntStress.daughtern.position"))) 1560 (item.set_feat (item.prev 1561 (item.relation.daughtern int-unit 'IntStress)) 1562 "position" (string-append last-pos "-1")))) 1563 (set! int-unit (item.next int-unit))))) 1564 1565(define (czech-word utt) 1566 (Classic_Word utt) 1567 (czech-intonation-units utt) 1568 (czech-stress-units utt) 1569 (czech-adjust-phrase-breaks utt) 1570 utt) 1571 1572;;; Pauses 1573 1574(define (czech-add-strokes utt) 1575 (let ((stroke '(_ (("name" _)))) 1576 (i (utt.relation.first utt 'SylStructure))) 1577 (while i 1578 ;; Insert _ before vowels at the beginning of word boundaries 1579 (if (and (czech-item.feat? i "daughter1.daughter1.ph_vc" '+) 1580 (item.prev i) 1581 (not (czech-item.feat? i "daughter1.daughter1.R:Segment.p.name" 1582 '#))) 1583 (item.insert 1584 (item.relation (item.daughter1 (item.daughter1 i)) 'Segment) 1585 stroke 'before)) 1586 (set! i (item.next i))))) 1587 1588(define (czech-pause-breaks utt) 1589 (Classic_Pauses utt) 1590 (let ((words (utt.relation.items utt 'Word))) 1591 ;; Handle SB -- Classic_Pauses doesn't know about it 1592 (mapcar 1593 (lambda (w) 1594 (if (czech-item.feat? w "pbreak" 'SB) 1595 (insert_pause utt w))) 1596 words))) 1597 1598(define (czech-pause utt) 1599 (czech-pause-breaks utt) 1600 (czech-add-strokes utt) 1601 (czech-adjust-phonetic-form utt) 1602 utt) 1603 1604;;; Accents 1605 1606(defvar czech-accent-cart-tree '(NONE)) 1607 1608;; Intonation 1609 1610(defvar czech-int-contours 1611 '(((A 1) (0.02 -0.05) (0.02 -0.04) (0 0)) 1612 ((B 1) (-0.01 0.02) (-0.02 0.04) (-0.02 0.05)) 1613 ((C 1) (-0.04 -0.10) (0.02 -0.16) (-0.02 -0.12) (-0.02 -0.14)) 1614 ((D 1) (-0.14 0.16) (-0.14 0.20)) 1615 ((FA 1) (0.02 -0.04) (0 0)) 1616 ((FB 1) (-0.02 0.04) (-0.02 0.05)) 1617 ((A 2) (0.02 -0.05) (0.04 -0.08) (-0.03 0)) 1618 ((B 2) (-0.04 0.06) (-0.02 0.04) (-0.02 0.07)) 1619 ((C 2) (0 -0.10) (-0.04 -0.10) (-0.02 -0.12) (0.02 -0.16)) 1620 ((D 2) (-0.06 0.08) (-0.10 0.14)) 1621 ((FA 2) (0.04 -0.08) (-0.03 0)) 1622 ((FB 2) (-0.02 0.04) (-0.02 0.07)) 1623 ((A 3) (0.02 -0.02 -0.04) (0.02 -0.04 -0.02) (0.04 -0.04 -0.04) 1624 (0 0 -0.02) (0 -0.04 0) (-0.04 0.08 -0.10) (-0.04 0.04 -0.04) 1625 (-0.02 -0.01 0)) 1626 ((B 3) (0 -0.04 0.04) (0 -0.06 0.04) (-0.06 0.04 0.02) 1627 (-0.01 0.04 0.02) (-0.06 0 0.06) (-0.06 0.02 0.04) 1628 (-0.04 0.04 -0.04)) 1629 ((C 3) (0 -0.05 -0.05) (-0.04 -0.02 -0.08) (-0.06 -0.04 -0.04) 1630 (-0.06 -0.10 -0.02)) 1631 ((D 3) (-0.06 -0.01 0.09) (-0.06 0.08 -0.01)) 1632 ((FA 3) (-0.04 0.08 -0.10) (-0.04 0.04 -0.04) (-0.02 -0.01 0)) 1633 ((FB 3) (-0.06 0 0.06) (-0.06 0.02 0.04) (-0.04 0.04 -0.04)) 1634 ((A 4) (0 0 -0.02 -0.01) (-0.02 0 -0.03 0) (-0.03 0.03 -0.02 -0.01) 1635 (0 0 -0.01 0)) 1636 ((B 4) (0 -0.03 0.01 0.02) (-0.02 0 0.02 0.02) (0 -0.03 0.03 0.02)) 1637 ((C 4) (-0.04 -0.06 -0.02 -0.02) (-0.02 -0.02 -0.04 -0.06) 1638 (-0.02 -0.08 -0.04 -0.02)) 1639 ((D 4) (-0.06 0 -0.01 0.12) (-0.06 0.12 0 -0.03)) 1640 ((FA 4) (-0.03 0.03 -0.02 -0.01) (0 0 -0.01 0)) 1641 ((FB 4) (-0.02 0 0.02 0.02) (0 -0.03 0.03 0.02)) 1642 ((A 5) (-0.02 0.02 -0.02 -0.01 0) (-0.03 0.03 0 0 -0.03) 1643 (-0.02 0.02 0 0 -0.02)) 1644 ((B 5) (0 -0.03 0.01 0.02 0.01) (0.01 -0.02 0 0 0.02) 1645 (-0.02 0 0.02 0.02 0)) 1646 ((C 5) (-0.02 0 -0.02 -0.04 -0.06) (-0.02 -0.08 -0.02 -0.02 -0.02) 1647 (-0.02 -0.02 -0.08 -0.02 -0.02)) 1648 ((D 5) (-0.06 0 -0.01 -0.01 0.13) (-0.06 0.13 0 -0.04 -0.04)) 1649 ((FA 5) (-0.02 0.02 0 0 -0.02)) 1650 ((FB 5) (-0.02 0 0.02 0.02 0)) 1651 ((A 6) (-0.02 0.02 -0.01 0 (0) -0.02 -0.01)) 1652 ((B 6) (0 -0.01 0 0 (0) 0.01 0.01) (0 -0.02 0.01 0.01 (0) 0.01 0.02)) 1653 ((C 6) (-0.02 0 -0.02 -0.04 -0.06 0 (0)) 1654 (-0.02 -0.08 -0.02 -0.02 -0.02 (0)) 1655 (-0.02 -0.02 -0.08 -0.02 -0.02 -0.02 (0))) 1656 ((D 6) (-0.06 0 -0.01 -0.01 0 (0) 0.13) (0.13 0 -0.02 0 (0) -0.04 -0.04)) 1657 ((FA 6) (-0.02 0.02 -0.01 0 (0) -0.02 -0.01)) 1658 ((FB 6) (0 -0.02 0.01 0.01 (0) 0.01 0.02)) 1659 )) 1660 1661(defvar czech-int-contour-tree 1662 ;; Contourtype set: A, B, C, D, FA and FB (for F and F-1 positions) 1663 '((position is I) 1664 ((preelement > 0) 1665 ((B)) 1666 ((A))) 1667 ((position is M) 1668 ((p.contourtype is B) 1669 ((A)) 1670 ((B))) 1671 ((position is F-1) ((FB)) 1672 ((position is F) ((FA)) 1673 ((position is FF-KKL-1) ((A)) 1674 ((position is FF-KKL) ((C)) 1675 ((position is FF-IT-1) ((B)) 1676 ((position is FF-IT) ((D)) 1677 ((ERROR))))))))))) 1678 1679(define (czech-int-select-contours utt) 1680 (let ((unit (utt.relation utt 'StressUnit)) 1681 (last-contour nil)) 1682 (while unit 1683 (let ((position (item.feat unit 'position))) 1684 ;; Determine appropriate contour type 1685 (let ((contourtype (wagon_predict unit czech-int-contour-tree))) 1686 (item.set_feat unit "contourtype" contourtype) 1687 ;; Find particular contour 1688 (let ((nsyls (czech-unit-syllable-count unit))) 1689 (let ((contour (czech-random-choice 1690 (cdr (assoc (list contourtype 1691 (if (<= nsyls 6) nsyls 6)) 1692 czech-int-contours))))) 1693 ;; Adjust the first syllables of final contours 1694 (if (or (string-equal position "F") 1695 (string-matches position "FF.*[A-Z]")) 1696 (let ((adjust-contour 1697 (lambda (c adj) 1698 (if last-contour 1699 (cons (+ (car (last last-contour)) adj) (cdr c)) 1700 c)))) 1701 (cond 1702 ((string-equal position "F") 1703 (set! contour (adjust-contour contour -0.02))) 1704 ((string-equal position "FF-KKL") 1705 (set! contour (adjust-contour contour 0.02))) 1706 ((string-equal position "FF-IT") 1707 (set! contour (adjust-contour contour -0.02)))))) 1708 ;; Set contour values for preelements 1709 (if (czech-item.feat? unit 'preelement 1) 1710 (set! contour (cons (- (car contour) 0.02) contour))) 1711 ;; Finalize contours of long units 1712 (let ((n (- nsyls 6))) 1713 (if (>= n 0) 1714 (let ((prefix '()) 1715 (contour* contour)) 1716 (while (not (consp (car contour*))) 1717 (set! prefix (cons (car contour*) prefix)) 1718 (set! contour* (cdr contour*))) 1719 (let ((val (caar contour*))) 1720 (set! contour* (cdr contour*)) 1721 (while (> n 0) 1722 (set! contour* (cons val contour*)) 1723 (set! n (- n 1))) 1724 (set! contour (append (reverse prefix) 1725 contour*)))))) 1726 (set! last-contour contour) 1727 (item.set_feat unit 'contour contour))))) 1728 (set! unit (item.next unit))) 1729 ;; Spread the contours on sylwords 1730 (set! unit (utt.relation utt 'StressUnit)) 1731 (while unit 1732 (let ((contour (item.feat unit 'contour)) 1733 (kernels (czech-syllable-kernels 1734 (czech-stress-unit-phonemes unit)))) 1735 (if (eqv? (length kernels) 1) 1736 ;; One-syllabic units have two-number contours 1737 ;; (they can occur only in the final positions) 1738 (let ((k (car kernels)) 1739 (contour-1 (car contour)) 1740 (contour-2 (cadr contour))) 1741 (let ((k* (reverse k)) 1742 (last-k (car (last k))) 1743 (contour-list (list (list 0.1 contour-1) 1744 (list 0.9 contour-2)))) 1745 (if (eqv? (length k) 1) 1746 ;; Single phone in kernel -- put both values on it 1747 (item.set_feat (car k) 'contourval contour-list) 1748 ;; Multiple phones -- spread the values over true kernel 1749 (begin 1750 (while (czech-item.feat? (cadr k*) 'ph_vc '+) 1751 (set! k* (cdr k*))) 1752 (if (eq? (car k*) last-k) 1753 (item.set_feat last-k 'contourval contour-list) 1754 (begin 1755 (item.set_feat (car k*) 'contourval contour-1) 1756 (item.set_feat last-k 'contourval contour-2))))) 1757 ;; Extend the contour pair to certain neighbors 1758 (set! k* (cdr k*)) 1759 (while k* 1760 (item.set_feat (car k*) 'contourval contour-1) 1761 (set! k* (cdr k*))) 1762 (let ((next-k (item.next last-k))) 1763 (while (or (czech-item.feat? next-k 'ph_cvox '+) 1764 (czech-item.feat? next-k 'ph_cvox 'u)) 1765 (item.set_feat next-k 'contourval contour-2) 1766 (set! next-k (item.next next-k)))))) 1767 ;; Otherwise spread the contour value over all kernels 1768 (while kernels 1769 (let ((contourval (car contour))) 1770 (mapcar (lambda (seg) 1771 (item.set_feat seg 'contourval contourval)) 1772 (car kernels))) 1773 (set! kernels (cdr kernels)) 1774 (set! contour (cdr contour))))) 1775 (set! unit (item.next unit))))) 1776 1777(defvar czech-int-simple-params '((f0_mean 100) (f0_std 10))) 1778 1779(define (czech-int-targets utt syl) 1780 (let ((segments (item.relation.daughters syl 'SylStructure)) 1781 (syl-start (item.feat syl 'syllable_start)) 1782 (f0-base (cadr (assq 'f0_mean int_general_params))) 1783 (f0-std (/ (cadr (assq 'f0_std int_general_params)) 10)) 1784 (times-values '())) 1785 (let ((last-seg-end syl-start) 1786 (f0-value (lambda (contourval) 1787 (* f0-base (+ 1 (* f0-std contourval)))))) 1788 (while segments 1789 (let ((s (car segments))) 1790 (let ((contourval (and (czech-item.has-feat s 'contourval) 1791 (item.feat s 'contourval))) 1792 (seg-end (item.feat s 'end))) 1793 (cond 1794 ((consp contourval) 1795 (let ((tlen (- seg-end last-seg-end))) 1796 (set! times-values 1797 (append 1798 (mapcar (lambda (v) 1799 (list (+ last-seg-end 1800 (* (read-from-string (car v)) tlen)) 1801 (f0-value (cadr v)))) 1802 (reverse contourval)) 1803 times-values)))) 1804 (contourval 1805 (let ((time (/ (+ last-seg-end seg-end) 2.0)) 1806 (value (f0-value contourval))) 1807 (set! times-values (cons (list time value) times-values))))) 1808 (set! last-seg-end seg-end) 1809 (set! segments (cdr segments)))))) 1810 ;; Festival apparently decreases F0 at the end of the utterance, prevent it 1811 (if (not (null? times-values)) 1812 (let ((last-time (car (car times-values))) 1813 (last-value (cadr (car times-values))) 1814 (last-seg (item.relation.daughtern syl 'SylStructure))) 1815 (set! times-values (cons (list (czech-max (- (item.feat last-seg 'end) 0.01) 1816 (+ last-time 0.001)) 1817 last-value) 1818 times-values)))) 1819 (reverse times-values))) 1820 1821;;; Duration 1822 1823(defvar czech-phoneme-durations 1824 '((# 0.100) 1825 (_ 0.025) 1826 (a 0.098) 1827 (a: 0.142) 1828 (b 0.067) 1829 (c 0.102) 1830 (ch 0.087) 1831 (c~ 0.099) 1832 (d 0.062) 1833 (dz 0.108) 1834 (dz~ 0.094) 1835 (d~ 0.077) 1836 (e 0.099) 1837 (e: 0.126) 1838 (f 0.089) 1839 (g 0.067) 1840 (h 0.064) 1841 (i 0.077) 1842 (i: 0.120) 1843 (j 0.065) 1844 (k 0.080) 1845 (l 0.057) 1846 (m 0.068) 1847 (n 0.075) 1848 (n* 0.098) 1849 (n~ 0.079) 1850 (o 0.089) 1851 (o: 0.137) 1852 (p 0.079) 1853 (r 0.060) 1854 (r~ 0.065) 1855 (r~* 0.073) 1856 (s 0.098) 1857 (s~ 0.090) 1858 (t 0.082) 1859 (t~ 0.090) 1860 (u 0.082) 1861 (u: 0.139) 1862 (v 0.058) 1863 (z 0.077) 1864 (z~ 0.074) 1865 )) 1866 1867(defvar czech-silence-durations 1868 '(("BB" 0.206 0.238) ("B" 0.082 0.095) ("SB" 0.008 0.010))) 1869 1870(defvar czech-stress-duration-factors 1871 '((1 1.03) 1872 (2 1.02) 1873 (3 1.01) 1874 (4 1.00) 1875 (5 1.00) 1876 (6 0.99) 1877 (7 0.98) 1878 (8 0.96) 1879 (9 0.94) 1880 (10 0.93) 1881 (11 0.91) 1882 (12 0.90))) 1883 1884(defvar czech-duration-random-factor 0.2) 1885 1886(define (czech-duration-pauses utt) 1887 (let ((word (utt.relation.first utt 'Word))) 1888 (while word 1889 1890 (let ((durspec (assoc_string (item.feat word "pbreak") 1891 czech-silence-durations))) 1892 (if durspec 1893 (let ((min (nth 1 durspec)) 1894 (max (nth 2 durspec)) 1895 (seg (find_last_seg word))) 1896 (if seg 1897 (item.set_feat 1898 (item.next (item.relation seg 'Segment)) 1899 'dur_factor 1900 (* 10 (+ min (* (- max min) (czech-rand))))))))) 1901 (set! word (item.next word))))) 1902 1903(define (czech-duration-factors utt) 1904 (let ((sunit (utt.relation.first utt 'StressUnit))) 1905 (while sunit 1906 (let ((nphones (length (czech-stress-unit-phonemes sunit)))) 1907 (cond 1908 ((> nphones 12) 1909 (set! nphones 12)) 1910 ((< nphones 1) 1911 (set! nphones 1))) 1912 (let ((factor (cadr (assoc nphones czech-stress-duration-factors)))) 1913 (mapcar (lambda (syl) 1914 (mapcar (lambda (seg) 1915 (item.set_feat seg "dur_factor" factor)) 1916 (item.relation.daughters syl 'SylStructure))) 1917 (item.relation.leafs sunit 'StressUnit)))) 1918 (set! sunit (item.next sunit)))) 1919 ;; Adjust duration factors for initial single-syllabic word 1920 ;; (Take the initial word from Word, not just SylStructure, which may contain 1921 ;; prepunctuation.) 1922 (let ((1st-word (utt.relation.first utt 'Word))) 1923 (while (and 1st-word 1924 (item.daughter1 1st-word) 1925 (item.daughter1 (item.daughter1 1st-word))) 1926 (set! 1st-word (item.next 1st-word))) 1927 (let ((phonemes (and 1st-word 1928 (apply append 1929 (mapcar item.daughters 1930 (item.daughters 1931 (item.relation 1st-word 1932 'SylStructure))))))) 1933 (if (eqv? (czech-syllable-count phonemes) 1) 1934 (let ((durfact (cadr (assoc (czech-min (length phonemes) 12) 1935 czech-stress-duration-factors)))) 1936 (mapcar (lambda (ph) (item.set_feat ph 'dur_factor durfact)) 1937 phonemes)))))) 1938 1939(define (czech-duration-compute utt) 1940 (mapcar 1941 (lambda (seg) 1942 (let ((factor (* (item.feat seg "dur_factor") 1943 (Param.get 'Duration_Stretch)))) 1944 (item.set_feat seg "end" 1945 (+ (item.feat seg "start") 1946 (* (if (<= factor 0) 1 factor) 1947 (cadr (assoc_string (item.name seg) 1948 czech-phoneme-durations*))))))) 1949 (utt.relation.items utt 'Segment))) 1950 1951(define (czech-duration utt) 1952 (czech-duration-pauses utt) 1953 (czech-duration-factors utt) 1954 (czech-duration-compute utt) 1955 utt) 1956 1957;;; Volume 1958 1959(defvar czech-volume-scale 1.8) 1960(defvar czech-volume-scale* nil) 1961 1962(define (czech-adjust-volume utt) 1963 (utt.wave.rescale utt czech-volume-scale*)) 1964 1965;;; Final phoneme translation 1966 1967(define (czech-translate-add-vowels utt) 1968 (if (and (string-equal (Param.get 'Language) 'czech) 1969 czech-insert-filling-vowels) 1970 (let ((i (utt.relation.first utt 'Segment)) 1971 (insert-item (lambda (name orig-ph end pos) 1972 (let ((feats (item.features orig-ph)) 1973 (new-feats `((name ,name) (end ,end)))) 1974 (while feats 1975 (if (not (member (caar feats) '(id name end))) 1976 (set! new-feats (cons (car feats) 1977 new-feats))) 1978 (set! feats (cdr feats))) 1979 (item.insert orig-ph (cons name (list new-feats)) 1980 pos) 1981 (let ((new ((if (eq? pos 'after) 1982 item.next item.prev) 1983 orig-ph))) 1984 (if (member 'SylStructure 1985 (item.relations orig-ph)) 1986 (item.relation.insert orig-ph 'SylStructure 1987 new pos)))))) 1988 (vowel? (lambda (ph) (czech-item.feat? ph 'ph_vc '+))) 1989 (last-end 0.0)) 1990 (while i 1991 (let ((end (item.feat i 'end))) 1992 (cond 1993 ;; Duplicate vowels 1994 ((vowel? i) 1995 (insert-item (item.name i) i (/ (+ last-end end) 2) 'before))) 1996 (set! last-end end)) 1997 (set! i (item.next i))))) 1998 utt) 1999 2000(define (czech-translate-phonemes utt) 2001 (if (and (string-equal (Param.get 'Language) 'czech) 2002 czech-phoneset-translation*) 2003 (mapcar 2004 (lambda (item) 2005 (let ((tr (assoc (item.name item) czech-phoneset-translation*))) 2006 (if tr (item.set_name item (cadr tr))))) 2007 (utt.relation.items utt 'Segment))) 2008 utt) 2009 2010(defvar czech-after-analysis-hooks 2011 (list czech-translate-add-vowels czech-translate-phonemes)) 2012 2013;;; Finally, the language definition itself 2014 2015(define (czech-reset-parameters) 2016 (set! czech-lts-extra-rules* czech-lts-extra-rules) 2017 (set! czech-int-simple-params* czech-int-simple-params) 2018 (set! czech-phoneme-durations* czech-phoneme-durations) 2019 (set! czech-volume-scale* czech-volume-scale) 2020 (set! czech-phoneset-translation* czech-phoneset-translation) 2021 (set! czech-after-analysis-hooks* czech-after-analysis-hooks) 2022 (Param.set 'Synth_Method 'UniSyn)) 2023 2024(define (voice-czech-common) 2025 (voice_reset) 2026 (Param.set 'Language 'czech) 2027 ;; Phone set 2028 (Param.set 'PhoneSet 'czech) 2029 (PhoneSet.select 'czech) 2030 (set! pos_lex_name nil) 2031 ;; Tokenization 2032 (set! token.unknown_word_name czech-token.unknown-word-name) 2033 (set! token.whitespace czech-token.whitespace) 2034 (set! token.punctuation czech-token.punctuation) 2035 (set! token.prepunctuation czech-token.prepunctuation) 2036 (set! token_to_words czech-token-to-words) 2037 (Param.set 'Token_Method 'Token_Any) 2038 ;; Lexicon selection 2039 (lex.select "czech") 2040 ;; Segmentation 2041 (Param.set 'Word_Method 'czech-word) 2042 ;; Part of speech 2043 (set! guess_pos czech-guess-pos) ; not actually used 2044 (Param.set 'POS_Method czech-pos) 2045 ;; Simple phrase break prediction by punctuation 2046 (set! pos_supported nil) 2047 (set! phrase_cart_tree czech-phrase-cart-tree) 2048 (Param.set 'Phrase_Method 'cart_tree) 2049 (Param.set 'Phrasify_Method Classic_Phrasify) 2050 ;; Pauses 2051 (Param.set 'Pause_Method czech-pause) 2052 ;; Accent prediction and intonation 2053 (set! int_accent_cart_tree czech-accent-cart-tree) 2054 (Param.set 'Int_Method czech-int-select-contours) 2055 (set! int_general_params (cons (list 'targ_func czech-int-targets) 2056 czech-int-simple-params*)) 2057 (Param.set 'Int_Target_Method Int_Targets_General) 2058 ;; Duration prediction 2059 (Param.set 'Duration_Method czech-duration) 2060 ;; Postlex rules 2061 (set! postlex_rules_hooks '()) 2062 (set! after_analysis_hooks czech-after-analysis-hooks*) 2063 ;; Final voice adjustment 2064 (set! after_synth_hooks (list czech-adjust-volume)) 2065 ;; Set current voice 2066 (set! current_voice_reset nil) 2067 (set! current-voice 'czech)) 2068 2069(defmac (czech-proclaim-voice form) 2070 (let ((name (nth 1 form)) 2071 (description (nth 2 form)) 2072 (body (nth_cdr 3 form)) 2073 (options ())) 2074 (if (consp name) 2075 (begin 2076 (set! options (cdr name)) 2077 (set! name (car name)))) 2078 (set! name (intern (string-append 'czech_ name))) 2079 (let ((parameters `((language czech) 2080 (dialect ,(cdr (assoc 'dialect options))) 2081 (gender ,(cadr (assoc 'gender options))) 2082 (coding ISO-8859-2) 2083 (description ,description)))) 2084 `(begin 2085 (define (,(intern (string-append 'voice_ name))) 2086 (czech-reset-parameters) 2087 ,@body 2088 (voice-czech-common) 2089 (set! current-voice (quote ,name))) 2090 (proclaim_voice 2091 (quote ,name) 2092 (quote ,parameters)))))) 2093 2094(provide 'czech) 2095