Lines Matching +refs:czech +refs:token +refs:whitespace

26 (define (czech-min x y)
29 (define (czech-max x y)
32 (define (czech-item.has-feat item feat)
35 (define (czech-item.feat? item feat value)
38 (define (czech-item.feat*? item feat value)
41 (define (czech-all-same lst)
44 (czech-all-same (cdr lst)))))
46 (define (czech-suffix string i)
49 (defvar czech-randomize t)
51 (defvar czech-rand-range nil)
53 (defvar czech-moravian t)
55 (defvar czech-insert-filling-vowels t)
57 (defvar czech-group-digits 3)
59 (define (czech-rand)
60 (if czech-randomize
62 (if (not czech-rand-range)
70 (set! czech-rand-range 1)
71 (while (> max czech-rand-range)
72 (set! czech-rand-range (* 2 czech-rand-range)))))
73 (/ (rand) czech-rand-range))
76 (define (czech-random-choice lst)
78 (let ((n (* (czech-rand) max)))
81 (define (czech-next-token-punc word)
86 (define (czech-next-punc word)
87 (let ((token (item.next (item.parent (item.relation word 'Token)))))
88 (while (and token (not (string-matches (item.feat token 'punc) "[^0]+")))
89 (set! token (item.next token)))
90 (if token
91 (item.feat token 'punc)
94 (define (czech-prev-punc word)
95 (let ((token (item.prev (item.parent (item.relation word 'Token)))))
96 (while (and token (not (string-matches (item.feat token 'punc) "[^0]+")))
97 (set! token (item.prev token)))
98 (if token
99 (item.feat token 'punc)
102 (define (czech-word-stress-unit word)
107 (define (czech-stress-unit-punc unit)
113 (defPhoneSet czech unknown
174 (defvar czech-phoneset-translation '())
175 (defvar czech-phoneset-translation* nil)
180 czech-normalize
287 czech-orthography
444 (defvar czech-unknown-symbol-word "nezn�m�")
446 (defvar czech-lts-extra-rules '())
448 (define (czech-basic-lts word)
449 (let ((word (if (lts.in.alphabet word 'czech-normalize)
451 czech-unknown-symbol-word)))
455 (lts.apply word 'czech-normalize)
456 'czech-orthography))
460 (define (czech-syllabify-phstress phones)
465 (define (czech-lts word features)
469 (czech-basic-lts word))))
471 (czech-syllabify-phstress
472 (let ((rules czech-lts-extra-rules*))
479 (define (czech-downcase word)
480 (if (lts.in.alphabet word 'czech-normalize)
481 (apply string-append (lts.apply word 'czech-normalize))
486 (defvar czech-token.unknown-word-name "nezn�m�")
487 (defvar czech-token.separator-word-name "odd�lova�") ; our own variable
488 (defvar czech-token.garbage-word-name "smet�") ; our own variable
489 (defvar czech-token.whitespace " �\t\n\r")
490 (defvar czech-token.punctuation "\"'`.,:;!?-(){}[]<>")
491 (defvar czech-token.prepunctuation "\"'`({[<")
495 (defvar czech-chars "a-zA-Z����������������������������ة����ݮ")
496 (defvar czech-char-regexp (string-append "[" czech-chars "]"))
498 (defvar czech-multiword-abbrevs
524 (define (czech-remove element list)
527 ((equal? element (car list)) (czech-remove element (cdr list)))
528 (t (cons (car list) (czech-remove element (cdr list))))))
530 (define (czech-number name)
534 (czech-number (czech-suffix name 1))))
537 (append (czech-number (string-before name comma))
539 (czech-number (string-after name comma)))))
543 (cons "nula" (czech-number (czech-suffix name 1))))
545 (czech-number-from-digits (czech-remove (car (symbolexplode " "))
548 (define (czech-digits-1 digits)
549 (if czech-group-digits
551 (while (> (- n czech-group-digits) 0)
552 (set! n (- n czech-group-digits)))
553 (append (czech-number (substring digits 0 n))
554 (if (> (length digits) czech-group-digits)
555 (czech-digits (czech-suffix digits n))
557 (czech-number digits)))
559 (define (czech-digits digits)
564 (append (czech-number "0") (czech-digits (czech-suffix digits 1))))
566 (czech-digits-1 digits))))
568 (define (czech-prepend-numprefix token name)
569 (if (czech-item.has-feat token 'numprefix)
570 (string-append (item.feat token 'numprefix) name)
573 (define (czech-number* token name)
574 (czech-number (czech-prepend-numprefix token name)))
576 (define (czech-number@ name)
583 (cons "nula" (czech-number (string-after name "0"))))
585 (czech-number name))))
587 (define (czech-number-from-digits digits)
629 (czech-number-from-digits (cdr digits)))))
644 (czech-number-from-digits (cdr digits))))
684 (czech-number-from-digits head-digits)
686 (czech-number-from-digits tail-digits)))))
688 (if czech-group-digits
689 (czech-digits (apply string-append digits))
690 (apply append (mapcar czech-number digits)))))))
692 (define (czech-tokenize-on-nonalphas string)
696 ((string-matches string (string-append "^" czech-char-regexp "*$"))
702 (while (string-matches (substring string i 1) czech-char-regexp)
714 (czech-tokenize-on-nonalphas
715 (czech-suffix string (+ i 1))))))))
717 (define (czech-token-to-words token name)
720 ((assoc_string (czech-downcase name) czech-multiword-abbrevs)
721 (apply append (mapcar (lambda (w) (czech-token-to-words token w))
722 (cadr (assoc_string (czech-downcase name)
723 czech-multiword-abbrevs)))))
725 (item.prev token)
726 (czech-item.feat*? token "p.name" "[-+]?[0-9]+[.,]?[0-9]*"))
731 (czech-item.has-feat token 'numprefix))
732 (not (czech-item.has-feat token 'punc))
733 (item.feat token "n.whitespace" " ")
734 (string-matches (item.feat token "n.name") "^[0-9][0-9][0-9]$"))
735 (item.set_feat (item.next token) 'numprefix
736 (czech-prepend-numprefix token name))
740 (czech-item.feat? token 'punc ".")
741 (item.next token)
742 (not (string-matches (item.feat token "n.whitespace") " +")))
743 (if (not (czech-item.has-feat token 'punctype))
744 (item.set_feat token 'punctype 'num))
745 (append (czech-number* token name)
749 (not (czech-item.has-feat token 'numprefix)))
750 (czech-digits name))
752 ((let ((nname (czech-prepend-numprefix token name)))
756 (if (not (czech-item.has-feat token 'punctype))
757 (item.set_feat token 'punctype 'num))
758 (let ((nname (czech-prepend-numprefix token name)))
759 (if (and (czech-item.feat? token "n.name" "K�")
762 (czech-number (string-before nname ","))
767 (czech-number hellers)
769 (czech-number nname))))
772 (string-matches (item.feat token "p.name") "^[-+]?[0-9]+,[-0-9]+$"))
778 (not (string-matches (item.feat token "p.name") capitals))
779 (not (string-matches (item.feat token "p.next") capitals))
786 (let ((expansion (cadr (assoc_string (czech-downcase phoneme)
787 czech-multiword-abbrevs))))
795 (lts.apply name 'czech-normalize))
803 (lts.apply name 'czech-normalize)))
805 ((and (string-matches name (string-append "^[^" czech-chars "0-9]+$"))
807 (czech-all-same (symbolexplode name)))
808 (list czech-token.separator-word-name))
809 ((and (string-matches name (string-append "^[^" czech-chars "0-9]$"))
810 (eqv? (length (item.daughters token)) 0)
811 (let ((punc (item.feat token 'punc)))
814 (czech-all-same (symbolexplode punc)))))
815 (item.set_feat token 'punc 0)
816 (list czech-token.separator-word-name))
820 (not (string-matches (item.feat token "p.name")
822 (not (string-matches (item.feat token "p.name")
824 (not (string-matches (item.feat token "p.name")
826 (append (czech-number@ (string-before name ":"))
827 (czech-number@ (string-after name ":"))))
829 (append (czech-number@ (string-before name ":"))
830 (czech-number@ (string-before (string-after name ":") ":"))
831 (czech-number@ (string-after (string-after name ":") ":"))))
834 (append (czech-number (string-before name ":"))
836 (czech-number (string-after name ":"))))
842 (czech-token-to-words token (string-append
846 (czech-token-to-words token (string-after (substring name 1 1000) "-"))))
848 ((string-matches name (string-append "^" czech-char-regexp "+$"))
849 (if (string-equal (czech-downcase name) "�") ; Festival bug workaround
852 ((string-matches name (string-append "^[^" czech-chars "0-9]+$"))
855 (list czech-token.garbage-word-name))
857 (string-equal (item.name token) name)
858 (or (not (string-matches (item.feat token 'prepunctuation) "0?"))
859 (not (string-matches (item.feat token 'punctuation) "0?"))))
860 ;; This handles the case when the whole token consists of two or more
865 ((assoc_string name czech-multiword-abbrevs)
866 (cadr (assoc_string name czech-multiword-abbrevs)))
870 ((string-matches name (string-append "^" czech-char-regexp "+-$"))
871 (czech-token-to-words token (string-before name "-")))
873 (string-append "^[" czech-chars "0-9]+-[-" czech-chars "0-9]+$"))
875 (czech-token-to-words token (string-before name "-"))
877 (czech-token-to-words token (string-after name "-"))))
884 (append (czech-digits (substring name 0 i))
885 (czech-token-to-words token (czech-suffix name i)))))
896 (append (czech-token-to-words token (substring name 0 i))
897 (czech-digits (substring name i (- j i)))
898 (czech-token-to-words token (czech-suffix name j)))))
906 (if (not (string-matches name (string-append "^[-" czech-chars "]+$")))
907 (item.set_feat token 'punctype nil))
910 (mapcar (lambda (name) (czech-token-to-words token name))
911 (czech-tokenize-on-nonalphas name))))))
915 (defvar czech-lexicon-file "czech-lexicon.out")
917 (lex.create "czech")
918 (lex.set.phoneset "czech")
919 (lex.select "czech")
923 (let ((file (path-append (car dirs) czech-lexicon-file)))
932 (lex.set.lts.method 'czech-lts)
937 (defvar czech-guess-pos
967 (define (czech-word-pos? word pos)
969 (apply append (mapcar (lambda (p) (cdr (assoc p czech-guess-pos)))
972 (define (czech-pos-in-phrase-from word)
976 (or (not (czech-item.feat*? w "R:Token.p.name" "0?"))
977 (and (czech-item.feat*? w "p.R:Token.parent.punc" "0?")
978 (czech-item.feat*? w "R:Token.parent.prepunctuation"
980 (not (czech-item.feat*?
982 (string-append "^[^" czech-chars "0-9]+$"))))))
987 (define (czech-pos-in-phrase-to word)
991 (or (czech-item.feat*? w "R:Token.n.name" "0?")
992 (and (czech-item.feat*? w "R:Token.parent.punc" "0?")
993 (czech-item.feat*?
995 (not (czech-item.feat*?
997 (string-append "^[^" czech-chars "0-9]+$"))))))
1002 (define (czech-pos-last-in-phrase? word)
1003 (<= (czech-pos-in-phrase-to word) 1))
1005 (define (czech-pos utt)
1008 (let ((name (czech-downcase (item.name w)))
1009 (token (item.parent (item.relation w 'Token))))
1012 ((czech-item.has-feat w 'pos)
1015 ((and (czech-item.has-feat token 'punctype)
1016 (string-matches name (string-append "^[^" czech-chars "0-9]+$")))
1017 (item.set_feat w 'pos (item.feat token 'punctype)))
1028 (czech-pos-last-in-phrase? w))
1033 (or (czech-pos-last-in-phrase? w) ; final word
1034 (czech-word-pos? (item.next w) '(prep0 prep))
1041 (cdr (assoc 'question czech-guess-pos))))
1043 ;; Nothing special: check the czech-guess-pos tree
1045 (let ((pos-sets czech-guess-pos))
1055 (mapcar (lambda (token)
1056 (if (and (czech-item.feat*? token 'punc "0?")
1057 (czech-item.feat? token "daughtern.R:Word.n.gpos" 'conj))
1058 (item.set_feat token 'punc ",")))
1064 (define (czech-next-simple-punc word)
1065 (let ((unit (item.next (czech-word-stress-unit word))))
1069 ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]")
1070 (czech-stress-unit-punc unit))
1071 ((czech-item.feat? unit 'preelement 1)
1072 (czech-next-punc word))
1076 (define (czech-prev-simple-punc word)
1077 (let ((unit (item.prev (czech-word-stress-unit word))))
1081 ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]")
1082 (czech-stress-unit-punc unit))
1084 (let ((token (item.prev (item.parent (item.relation word 'Token)))))
1085 (while (and token (not (string-matches (item.feat token 'punc) ".+")))
1086 (set! token (item.prev token)))
1087 (let ((pword (and token
1088 (item.next token)
1089 (item.daughter1 (item.next token)))))
1091 (czech-item.feat? (czech-word-stress-unit pword)
1093 (item.feat token 'punc)
1096 (defvar czech-phrase-cart-tree
1097 ;; Note: Additional corrections are applied in czech-adjust-phrase-breaks
1119 ((lisp_czech-next-token-punc matches "\".*")
1136 (define (czech-adjust-phrase-breaks utt)
1140 ((czech-item.feat? w 'pbreak 'XB1) ; "big" punctuations
1145 ((czech-item.feat? w "R:SylStructure.name" 0)
1148 ((or (czech-item.feat*? (czech-word-stress-unit w)
1151 (czech-item.feat*? (czech-word-stress-unit w)
1157 ((czech-item.feat? w 'pbreak 'XB2) ; "comma" punctuations
1164 ((czech-item.feat? w "R:SylStructure.name" 0)
1167 ((czech-item.feat*? w "lisp_czech-next-simple-punc" ".*,")
1169 ((czech-item.feat*? w "lisp_czech-prev-simple-punc" ".*,")
1171 ((czech-item.feat*? w "lisp_czech-prev-simple-punc"
1174 ((czech-item.feat*? (czech-word-stress-unit w)
1184 (define (czech-adjust-segments segments)
1200 (czech-item.feat? item2 "ph_postnas" '+)
1207 (if czech-moravian
1212 (czech-item.feat? item1 "ph_cvox" '-)
1216 (if (and (czech-item.feat? item1 "ph_cvox" '+)
1217 (not (czech-item.feat? item1 "ph_partner" 0))
1221 (czech-item.feat? item2 "ph_cvox" '-)
1222 (and (czech-item.feat? item2 "ph_cvox" 'u)
1228 czech-proper-single-syl-prepositions))))))
1231 (if (and (czech-item.feat? item1 "ph_cvox" '-)
1232 (not (czech-item.feat? item1 "ph_partner" 0))
1234 (czech-item.feat? item2 "ph_cvox" '+)
1238 (czech-adjust-segments (cdr segments)))))
1240 (define (czech-adjust-phonetic-form utt)
1245 (czech-adjust-segments items)
1250 (define (czech-intonation-units utt)
1252 (let ((token (utt.relation utt 'Token)))
1253 (while token
1254 (if (or (czech-item.feat*? token "daughtern.pbreak" "[SBX]?B[12]?")
1255 (czech-item.feat*? token "daughtern.p.pbreak" "[SBX]?B[12]?"))
1256 (let ((w (item.daughtern token)))
1263 (set! token (item.next token))))
1275 (if (or (czech-item.feat*? w "sentence_break" 1)
1277 ;; last token generates no words for whatever reason)
1291 (define (czech-yes-no-question int-unit)
1296 (not (czech-item.feat? int-unit
1299 (not (czech-item.feat? int-unit
1303 (defvar czech-proper-single-syl-prepositions
1306 (defvar czech-special-final-words
1309 (define (czech-syllable-kernels phonemes)
1313 (if (and (czech-item.feat? (car phonemes) 'ph_vc '-)
1314 (czech-item.feat? (car phonemes) 'ph_syl '+))
1317 (while (and phonemes (czech-item.feat? (car phonemes) 'ph_syl '-))
1323 (if (czech-item.feat? (car phonemes) 'ph_vc '-)
1325 (czech-item.feat? (car phonemes) 'ph_vc '-)
1326 (czech-item.feat? (car phonemes) 'ph_syl '+))
1330 (czech-item.feat? (car phonemes) 'ph_vc '+)
1331 (czech-item.feat? (car phonemes) 'ph_syl '+))
1336 (while (and seg (or (czech-item.feat? seg 'ph_cvox '+)
1337 (czech-item.feat? seg 'ph_cvox 'u)))
1343 (define (czech-syllable-count phonemes)
1344 (length (czech-syllable-kernels phonemes)))
1346 (define (czech-stress-unit-phonemes unit)
1355 (define (czech-unit-syllable-count unit)
1356 (czech-syllable-count (czech-stress-unit-phonemes unit)))
1358 (define (czech-identify-stress-units sylwords)
1380 (czech-item.feat? w 'pos 'prep0)
1382 (and (member (czech-downcase (item.name w))
1383 czech-proper-single-syl-prepositions)
1384 (not (czech-item.feat? w "pos" "se"))))
1392 (if (and (<= (czech-unit-syllable-count last-unit) 1)
1394 czech-special-final-words)))
1401 (<= (czech-unit-syllable-count (car units*)) 1))
1434 (let ((len (czech-unit-syllable-count u))
1439 (<= (czech-unit-syllable-count
1459 (czech-random-choice '(t nil)))
1471 (czech-random-choice '(t nil)))
1492 ((czech-random-choice '(t nil))
1513 (define (czech-stress-units utt)
1519 (let ((stress-units (czech-identify-stress-units
1553 (if (czech-yes-no-question int-unit) "FF-IT" "FF-KKL")
1565 (define (czech-word utt)
1567 (czech-intonation-units utt)
1568 (czech-stress-units utt)
1569 (czech-adjust-phrase-breaks utt)
1574 (define (czech-add-strokes utt)
1579 (if (and (czech-item.feat? i "daughter1.daughter1.ph_vc" '+)
1581 (not (czech-item.feat? i "daughter1.daughter1.R:Segment.p.name"
1588 (define (czech-pause-breaks utt)
1594 (if (czech-item.feat? w "pbreak" 'SB)
1598 (define (czech-pause utt)
1599 (czech-pause-breaks utt)
1600 (czech-add-strokes utt)
1601 (czech-adjust-phonetic-form utt)
1606 (defvar czech-accent-cart-tree '(NONE))
1610 (defvar czech-int-contours
1661 (defvar czech-int-contour-tree
1679 (define (czech-int-select-contours utt)
1685 (let ((contourtype (wagon_predict unit czech-int-contour-tree)))
1688 (let ((nsyls (czech-unit-syllable-count unit)))
1689 (let ((contour (czech-random-choice
1692 czech-int-contours)))))
1709 (if (czech-item.feat? unit 'preelement 1)
1733 (kernels (czech-syllable-kernels
1734 (czech-stress-unit-phonemes unit))))
1750 (while (czech-item.feat? (cadr k*) 'ph_vc '+)
1763 (while (or (czech-item.feat? next-k 'ph_cvox '+)
1764 (czech-item.feat? next-k 'ph_cvox 'u))
1777 (defvar czech-int-simple-params '((f0_mean 100) (f0_std 10)))
1779 (define (czech-int-targets utt syl)
1790 (let ((contourval (and (czech-item.has-feat s 'contourval)
1815 (set! times-values (cons (list (czech-max (- (item.feat last-seg 'end) 0.01)
1823 (defvar czech-phoneme-durations
1867 (defvar czech-silence-durations
1870 (defvar czech-stress-duration-factors
1884 (defvar czech-duration-random-factor 0.2)
1886 (define (czech-duration-pauses utt)
1891 czech-silence-durations)))
1900 (* 10 (+ min (* (- max min) (czech-rand)))))))))
1903 (define (czech-duration-factors utt)
1906 (let ((nphones (length (czech-stress-unit-phonemes sunit))))
1912 (let ((factor (cadr (assoc nphones czech-stress-duration-factors))))
1933 (if (eqv? (czech-syllable-count phonemes) 1)
1934 (let ((durfact (cadr (assoc (czech-min (length phonemes) 12)
1935 czech-stress-duration-factors))))
1939 (define (czech-duration-compute utt)
1948 czech-phoneme-durations*)))))))
1951 (define (czech-duration utt)
1952 (czech-duration-pauses utt)
1953 (czech-duration-factors utt)
1954 (czech-duration-compute utt)
1959 (defvar czech-volume-scale 1.8)
1960 (defvar czech-volume-scale* nil)
1962 (define (czech-adjust-volume utt)
1963 (utt.wave.rescale utt czech-volume-scale*))
1967 (define (czech-translate-add-vowels utt)
1968 (if (and (string-equal (Param.get 'Language) 'czech)
1969 czech-insert-filling-vowels)
1988 (vowel? (lambda (ph) (czech-item.feat? ph 'ph_vc '+)))
2000 (define (czech-translate-phonemes utt)
2001 (if (and (string-equal (Param.get 'Language) 'czech)
2002 czech-phoneset-translation*)
2005 (let ((tr (assoc (item.name item) czech-phoneset-translation*)))
2010 (defvar czech-after-analysis-hooks
2011 (list czech-translate-add-vowels czech-translate-phonemes))
2015 (define (czech-reset-parameters)
2016 (set! czech-lts-extra-rules* czech-lts-extra-rules)
2017 (set! czech-int-simple-params* czech-int-simple-params)
2018 (set! czech-phoneme-durations* czech-phoneme-durations)
2019 (set! czech-volume-scale* czech-volume-scale)
2020 (set! czech-phoneset-translation* czech-phoneset-translation)
2021 (set! czech-after-analysis-hooks* czech-after-analysis-hooks)
2024 (define (voice-czech-common)
2026 (Param.set 'Language 'czech)
2028 (Param.set 'PhoneSet 'czech)
2029 (PhoneSet.select 'czech)
2032 (set! token.unknown_word_name czech-token.unknown-word-name)
2033 (set! token.whitespace czech-token.whitespace)
2034 (set! token.punctuation czech-token.punctuation)
2035 (set! token.prepunctuation czech-token.prepunctuation)
2036 (set! token_to_words czech-token-to-words)
2039 (lex.select "czech")
2041 (Param.set 'Word_Method 'czech-word)
2043 (set! guess_pos czech-guess-pos) ; not actually used
2044 (Param.set 'POS_Method czech-pos)
2047 (set! phrase_cart_tree czech-phrase-cart-tree)
2051 (Param.set 'Pause_Method czech-pause)
2053 (set! int_accent_cart_tree czech-accent-cart-tree)
2054 (Param.set 'Int_Method czech-int-select-contours)
2055 (set! int_general_params (cons (list 'targ_func czech-int-targets)
2056 czech-int-simple-params*))
2059 (Param.set 'Duration_Method czech-duration)
2062 (set! after_analysis_hooks czech-after-analysis-hooks*)
2064 (set! after_synth_hooks (list czech-adjust-volume))
2067 (set! current-voice 'czech))
2069 (defmac (czech-proclaim-voice form)
2079 (let ((parameters `((language czech)
2086 (czech-reset-parameters)
2088 (voice-czech-common)
2094 (provide 'czech)