1;;; Czech support for Festival
2
3;; Copyright (C) 2003, 2004, 2005, 2006 Brailcom, o.p.s.
4
5;; Author: Milan Zamazal <pdm@brailcom.org>
6
7;; COPYRIGHT NOTICE
8
9;; This program is free software; you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation; either version 2 of the License, or
12;; (at your option) any later version.
13
14;; This program is distributed in the hope that it will be useful, but
15;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17;; for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with this program; if not, write to the Free Software
21;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
22
23
24;;; Utility functions
25
26(define (czech-min x y)
27  (if (<= x y) x y))
28
29(define (czech-max x y)
30  (if (>= x y) x y))
31
32(define (czech-item.has-feat item feat)
33  (assoc feat (item.features item)))
34
35(define (czech-item.feat? item feat value)
36  (and item (string-equal (item.feat item feat) value)))
37
38(define (czech-item.feat*? item feat value)
39  (and item (string-matches (item.feat item feat) value)))
40
41(define (czech-all-same lst)
42  (or (<= (length lst) 1)
43      (and (string-equal (car lst) (cadr lst))
44           (czech-all-same (cdr lst)))))
45
46(define (czech-suffix string i)
47  (substring string i (- (string-length string) i)))
48
49(defvar czech-randomize t)
50
51(defvar czech-rand-range nil)
52
53(defvar czech-moravian t)
54
55(defvar czech-insert-filling-vowels t)
56
57(defvar czech-group-digits 3)
58
59(define (czech-rand)
60  (if czech-randomize
61      (begin
62        (if (not czech-rand-range)
63            (let ((n 100)
64                  (max 0))
65              (while (> n 0)
66                (let ((r (rand)))
67                  (if (> r max)
68                      (set! max r)))
69                (set! n (- n 1)))
70              (set! czech-rand-range 1)
71              (while (> max czech-rand-range)
72                (set! czech-rand-range (* 2 czech-rand-range)))))
73        (/ (rand) czech-rand-range))
74      0.5))
75
76(define (czech-random-choice lst)
77  (let ((max (length lst)))
78    (let ((n (* (czech-rand) max)))
79      (nth n lst))))
80
81(define (czech-next-token-punc word)
82  (if (item.relation.next word "Token")
83      "0"
84      (item.feat word "R:Token.n.daughter1.prepunctuation")))
85
86(define (czech-next-punc word)
87  (let ((token (item.next (item.parent (item.relation word 'Token)))))
88    (while (and token (not (string-matches (item.feat token 'punc) "[^0]+")))
89      (set! token (item.next token)))
90    (if token
91        (item.feat token 'punc)
92        0)))
93
94(define (czech-prev-punc word)
95  (let ((token (item.prev (item.parent (item.relation word 'Token)))))
96    (while (and token (not (string-matches (item.feat token 'punc) "[^0]+")))
97      (set! token (item.prev token)))
98    (if token
99        (item.feat token 'punc)
100        0)))
101
102(define (czech-word-stress-unit word)
103  (let ((sylword (item.relation word 'SylStructure)))
104    (if (and sylword (item.daughter1 sylword))
105        (item.parent (item.relation (item.daughter1 sylword) 'StressUnit)))))
106
107(define (czech-stress-unit-punc unit)
108  (and unit
109       (item.feat unit "daughtern.R:SylStructure.parent.R:Token.parent.punc")))
110
111;;; Phone set
112
113(defPhoneSet czech
114  (;; vowel or consonant: vowel consonant
115   (vc + - 0)
116   ;; vowel length: short long
117   (vlng s l 0)
118   ;; consonant voicing: yes no unique
119   (cvox + - u 0)
120   ;; can create a syllable: yes no
121   (syl + - 0)
122   ;; can make previous consonant nasal: yes no
123   (postnas + - 0)
124   ;; voiced/unvoiced counterpart: phone
125   (partner b c c~ ch d d~ dz dz~ f g h k p r~* s s~ t t~ v z z~ 0)
126   )
127  (
128   ;;   c l v s n p
129   (#   0 0 0 0 0 0)                    ; pause
130   (_   0 0 0 - 0 0)                    ; vowel-vowel stroke
131   (a   + s 0 + - 0)
132   (a:  + l 0 + - 0)
133   (b   - 0 + - - p)
134   (c   - 0 - - - dz)
135   (c~  - 0 - - - dz~)
136   (ch  - 0 - - - 0)
137   (d   - 0 + - - t)
138   (d~  - 0 + - - t~)
139   (dz  - 0 + - - c)
140   (dz~ - 0 + - - c~)
141   (e   + s 0 + - 0)
142   (e:  + l 0 + - 0)
143   (f   - 0 - - - v)
144   (g   - 0 + - + k)
145   (h   - 0 + - - ch)
146   (i   + s 0 + - 0)
147   (i:  + l 0 + - 0)
148   (j   - 0 u - - 0)
149   (k   - 0 - - + g)
150   (l   - 0 u + - 0)
151   (m   - 0 u - - 0)
152   (n   - 0 u - - 0)
153   (n*  - 0 u - - 0)                    ; n before k or g
154   (n~  - 0 u - - 0)
155   (o   + s 0 + - 0)
156   (o:  + l 0 + - 0)
157   (p   - 0 - - - b)
158   (r   - 0 u + - 0)
159   (r~  - 0 + - - r~*)                  ; (default) voiced r~, may change to r~*
160   (r~* - 0 - - - 0)                    ; unvoiced r~, can't change back to r~
161   (s   - 0 - - - z)
162   (s~  - 0 - - - z~)
163   (t   - 0 - - - d)
164   (t~  - 0 - - - d~)
165   (u   + s 0 + - 0)
166   (u:  + l 0 + - 0)
167   (v   - 0 + - - f)
168   (z   - 0 + - - s)
169   (z~  - 0 + - - s~)
170  )
171)
172(PhoneSet.silences '(#))
173
174(defvar czech-phoneset-translation '())
175(defvar czech-phoneset-translation* nil)
176
177;;; Text to phones
178
179(lts.ruleset
180 czech-normalize
181 ;; just transforms the texts to a canonical form
182 ()
183 (
184  ( [ a ] = a )
185  ( [ � ] = � )
186  ( [ � ] = e )
187  ( [ b ] = b )
188  ( [ c ] = c )
189  ( [ � ] = � )
190  ( [ d ] = d )
191  ( [ � ] = � )
192  ( [ e ] = e )
193  ( [ � ] = � )
194  ( [ � ] = � )
195  ( [ f ] = f )
196  ( [ g ] = g )
197  ( [ h ] = h )
198  ( [ i ] = i )
199  ( [ � ] = � )
200  ( [ j ] = j )
201  ( [ k ] = k )
202  ( [ l ] = l )
203  ( [ m ] = m )
204  ( [ n ] = n )
205  ( [ � ] = � )
206  ( [ o ] = o )
207  ( [ � ] = � )
208  ( [ � ] = e )
209  ( [ p ] = p )
210  ( [ q ] = q )
211  ( [ r ] = r )
212  ( [ � ] = � )
213  ( [ s ] = s )
214  ( [ � ] = � )
215  ( [ � ] = s )
216  ( [ t ] = t )
217  ( [ � ] = � )
218  ( [ u ] = u )
219  ( [ � ] = � )
220  ( [ � ] = � )
221  ( [ � ] = y )
222  ( [ v ] = v )
223  ( [ w ] = w )
224  ( [ x ] = x )
225  ( [ y ] = y )
226  ( [ � ] = � )
227  ( [ z ] = z )
228  ( [ � ] = � )
229  ( [ A ] = a )
230  ( [ � ] = � )
231  ( [ � ] = e )
232  ( [ B ] = b )
233  ( [ C ] = c )
234  ( [ � ] = � )
235  ( [ D ] = d )
236  ( [ � ] = � )
237  ( [ E ] = e )
238  ( [ � ] = � )
239  ( [ � ] = � )
240  ( [ F ] = f )
241  ( [ G ] = g )
242  ( [ H ] = h )
243  ( [ I ] = i )
244  ( [ � ] = � )
245  ( [ J ] = j )
246  ( [ K ] = k )
247  ( [ L ] = l )
248  ( [ M ] = m )
249  ( [ N ] = n )
250  ( [ � ] = � )
251  ( [ O ] = o )
252  ( [ � ] = � )
253  ( [ � ] = e )
254  ( [ P ] = p )
255  ( [ Q ] = q )
256  ( [ R ] = r )
257  ( [ � ] = � )
258  ( [ S ] = s )
259  ( [ � ] = � )
260  ( [ T ] = t )
261  ( [ � ] = � )
262  ( [ U ] = u )
263  ( [ � ] = � )
264  ( [ � ] = � )
265  ( [ � ] = y )
266  ( [ V ] = v )
267  ( [ W ] = w )
268  ( [ X ] = x )
269  ( [ Y ] = y )
270  ( [ � ] = � )
271  ( [ Z ] = z )
272  ( [ � ] = � )
273  ;; digits are here to make this rule set usable in some other cases
274  ( [ 0 ] = 0 )
275  ( [ 1 ] = 1 )
276  ( [ 2 ] = 2 )
277  ( [ 3 ] = 3 )
278  ( [ 4 ] = 4 )
279  ( [ 5 ] = 5 )
280  ( [ 6 ] = 6 )
281  ( [ 7 ] = 7 )
282  ( [ 8 ] = 8 )
283  ( [ 9 ] = 9 )
284  ))
285
286(lts.ruleset
287 czech-orthography
288 ;; transforms Czech written text to a phonetic form
289 ((BPV b p v)
290  (DTN d t n)
291  (�Ii �)
292  (IY i y)
293  (�� � �)
294  (#_ # _)
295  (Vowel ae � � iou � � y �)
296  (Vowel+# ae � � iou � � y � #)
297  (SZ s z))
298 (
299  ;; Special rules
300  ( [ d ] i SZ m u = d )
301  ( [ n ] i SZ m u = n )
302  ( [ t ] i SZ m u = t )
303  ( [ n ] i s t = n )
304  ( [ t ] i s t = t )
305  ( [ t ] i c k = t )
306  ( [ t ] it= t )
307  ( # a n [ t ] i = t )
308  ( # a n t [ i ] Vowel = i )
309  ( t e c h [ n ] i = n )
310  ( [ d ] i s p = d )
311
312  ( l i [ c ] o m = c )
313  ( [ c ] o m = k )
314
315  ( f r [ e e ] = i: )
316
317  ( m l a [ d ] i s t = d~ )
318  ( [ d ] i s t = d )
319
320  ( # t r a [ t ] i v = t~ )
321  ( � [ t ] i v = t~ )
322  ( b o l e s [ t ] i v = t~ )
323  ( c [ t ] i v = t~ )
324  ( c e [ t ] i v = t~ )
325  ( c h [ t ] i v = t~ )
326  ( c h a m [ t ] i v = t~ )
327  ( c h r a p [ t ] i v = t~ )
328  ( c h r o p [ t ] i v = t~ )
329  ( � [ t ] i v = t~ )
330  ( d r [ t ] i v = t~ )
331  ( � [ t ] i v = t~ )
332  ( f i n [ t ] i v = t~ )
333  ( h l [ t ] i v = t~ )
334  ( h o [ t ] i v = t~ )
335  ( hm o [ t ] i v = t~ )
336  ( � [ t ] i v = t~ )
337  ( k l e v e [ t ] i v = t~ )
338  ( k r o u [ t ] i v = t~ )
339  ( o s [ t ] i v = t~ )
340  ( p i [ t ] i v = t~ )
341  ( p l e [ t ] i v = t~ )
342  ( p o l [ t ] i v = t~ )
343  ( r o [ t ] i v = t~ )
344  ( s e [ t ] i v = t~ )
345  ( s m r [ t ] i v = t~ )
346  ( s o p [ t ] i v = t~ )
347  ( � [ t ] i v = t~ )
348  ( v r [ t ] i v = t~ )
349  ( y [ t ] i v = t~ )
350  ( � � d o s [ t ] i v = t~ )
351  ( d i g e s [ t ] i v = t )
352  ( f e s [ t ] i v = t )
353  ( k o n t r a s [ t ] i v = t )
354  ( r e z i s [ t ] i v = t )
355  ( s u g e s [ t ] i v = t )
356  ( s [ t ] i v = t~ )
357  ( [ t ] i v = t )
358
359  ;; Special orthography rules
360  ( [ d ] �I = d~ )
361  ( [ t ] �I = t~ )
362  ( [ n ] �I = n~ )
363  ( DTN [ � ] = e )
364  ( BPV [ � ] = j e )
365  ( m [ � ] = n~ e )
366  ;; `i' handling
367  ( # m e z [ i ] Vowel = i _ )
368  ( #_ [ IY ] #_ = i )
369  ( Vowel+# [ IY ] Vowel+# = j )
370  ( Vowel [ �� ] Vowel = j i: j )
371  ( [ IY ] Vowel = i j )
372  ( [ �� ] Vowel = i: j )
373  ( Vowel [ IY ] = j )
374  ( Vowel [ �� ] = j i: )
375  ;; Some vowel-vowel pairs
376  ( m i m [ o ] Vowel = o _ )
377  ( # m n o h [ o ] Vowel = o _ )
378  ;; Two-letter phonemes
379  ( [ d � ] = dz~ )
380  ( [ d z ] = dz )
381  ( [ c h ] = ch )
382  ;; Special letters
383  ( [ � ] = j e )
384  ( # [ � ] = u: )
385  ( b e z [ � ] = _ u: )
386  ( o [ � ] = _ u: )
387  ( [ � ] h = _ u: )
388  ( [ � ] � e = _ u: )
389  ( [ � ] � t = _ u: )
390  ( [ � ] d r= _ u: )
391  ( [ � ] l o h = _ u: )
392  ( [ � ] r o= _ u: )
393  ( [ � ] r o d = _ u: )
394  ( [ � ] r o v= _ u: )
395  ;; Simple letters
396  ( [ a ] = a )
397  ( [ � ] = a: )
398  ( [ b ] = b )
399  ( [ c ] = c )
400  ( [ � ] = c~ )
401  ( [ d ] = d )
402  ( [ � ] = d~ )
403  ( [ e ] = e )
404  ( [ � ] = e: )
405  ( [ f ] = f )
406  ( [ g ] = g )
407  ( [ h ] = h )
408  ( [ i ] = i )
409  ( [ � ] = i: )
410  ( [ j ] = j )
411  ( [ k ] = k )
412  ( [ l ] = l )
413  ( [ m ] = m )
414  ( [ n ] = n )
415  ( [ � ] = n~ )
416  ( [ o ] = o )
417  ( [ � ] = o: )
418  ( [ p ] = p )
419  ( [ q ] = k v )
420  ( [ r ] = r )
421  ( [ � ] = r~ )
422  ( [ s ] = s )
423  ( [ � ] = s~ )
424  ( [ t ] = t )
425  ( [ � ] = t~ )
426  ( [ u ] = u )
427  ( [ � ] = u: )
428  ( [ � ] = u: )
429  ( [ v ] = v )
430  ( [ w ] = v )
431  ( [ x ] = k s )
432  ( [ y ] = i )
433  ( [ � ] = i: )
434  ( [ z ] = z )
435  ( [ � ] = z~ )
436  ))
437
438
439;; -- missing diphones: n-f n-g n-k
440;; -- special diphones: a-a: a-e: a-o: a-u: a:-a a:-a: a:-e a:-e: a:-o a:-o:
441;;                      a:-u a:-u: e-a: e-e: e-o: e-u: e:-a e:-a: atd.
442;;;;
443
444(defvar czech-unknown-symbol-word "nezn�m�")
445
446(defvar czech-lts-extra-rules '())
447
448(define (czech-basic-lts word)
449  (let ((word (if (lts.in.alphabet word 'czech-normalize)
450                  word
451                  czech-unknown-symbol-word)))
452    (if (string-equal word "")
453        nil
454        (let ((phonetic-form (lts.apply
455                              (lts.apply word 'czech-normalize)
456                              'czech-orthography))
457              phonetic-form*)
458          phonetic-form))))
459
460(define (czech-syllabify-phstress phones)
461  (if (null? phones)
462      ()
463      (list (list phones 0))))
464
465(define (czech-lts word features)
466  (list word
467        nil
468        (let ((transformed (and (not (string-equal word ""))
469                                (czech-basic-lts word))))
470          (if transformed
471              (czech-syllabify-phstress
472               (let ((rules czech-lts-extra-rules*))
473                 (while rules
474                   (set! transformed (lts.apply transformed (car rules)))
475                   (set! rules (cdr rules)))
476                 transformed))
477              '()))))
478
479(define (czech-downcase word)
480  (if (lts.in.alphabet word 'czech-normalize)
481      (apply string-append (lts.apply word 'czech-normalize))
482      word))
483
484;;; Tokenization
485
486(defvar czech-token.unknown-word-name "nezn�m�")
487(defvar czech-token.separator-word-name "odd�lova�") ; our own variable
488(defvar czech-token.garbage-word-name "smet�")       ; our own variable
489(defvar czech-token.whitespace " �\t\n\r")
490(defvar czech-token.punctuation "\"'`.,:;!?-(){}[]<>")
491(defvar czech-token.prepunctuation "\"'`({[<")
492
493;;; Token to words processing
494
495(defvar czech-chars "a-zA-Z����������������������������ة����ݮ")
496(defvar czech-char-regexp (string-append "[" czech-chars "]"))
497
498(defvar czech-multiword-abbrevs
499  '(("�" ("dlouh�" "a"))
500    ("�" ("dlouh�" "e"))
501    ("�" ("dlouh�" "i"))
502    ("�" ("dlouh�" "o"))
503    ("�" ("dlouh�" "u"))
504    ("�" ("u" "s" "krou�kem"))
505    ("w" ("dvojit�" "v"))
506    ("�" ("dlouh�" "y"))
507    ("`" ("obr�cen�" "apostrof"))
508    ("\\" ("zp�tn�" "lom�tko"))
509    (">" ("v�t��" "ne�"))
510    ("<" ("men��" "ne�"))
511    ("[" ("lev�" "hranat�"))
512    ("]" ("prav�" "hranat�"))
513    ("{" ("lev�" "slo�en�"))
514    ("}" ("prav�" "slo�en�"))
515    ("(" ("lev�" "kulat�"))
516    (")" ("prav�" "kulat�"))
517    ("=" ("rovn�" "se"))
518    ("\n" ("nov�" "��dek"))
519    ("os/2" ("OS" "2"))
520    ("km/h" ("kilometr�" "za" "hodinu"))
521    ("m/s" ("metr�" "za" "sekundu"))
522    ))
523
524(define (czech-remove element list)
525  (cond
526   ((null? list) list)
527   ((equal? element (car list)) (czech-remove element (cdr list)))
528   (t (cons (car list) (czech-remove element (cdr list))))))
529
530(define (czech-number name)
531  (cond
532   ((string-matches name "^[-+].*")
533    (cons (substring name 0 1)
534          (czech-number (czech-suffix name 1))))
535   ((string-matches name ".*[,.].*")
536    (let ((comma (if (string-matches name ".*,.*") "," ".")))
537      (append (czech-number (string-before name comma))
538              (list comma)
539              (czech-number (string-after name comma)))))
540   ((string-equal name "0")
541    (list "nula"))
542   ((string-matches name "^0..*")
543    (cons "nula" (czech-number (czech-suffix name 1))))
544   (t
545    (czech-number-from-digits (czech-remove (car (symbolexplode " "))
546                                            (symbolexplode name))))))
547
548(define (czech-digits-1 digits)
549  (if czech-group-digits
550      (let ((n (string-length digits)))
551        (while (> (- n czech-group-digits) 0)
552          (set! n (- n czech-group-digits)))
553        (append (czech-number (substring digits 0 n))
554                (if (> (length digits) czech-group-digits)
555                    (czech-digits (czech-suffix digits n))
556                    nil)))
557      (czech-number digits)))
558
559(define (czech-digits digits)
560  (cond
561   ((string-equal digits "")
562    '())
563   ((string-matches digits "^0.*")
564    (append (czech-number "0") (czech-digits (czech-suffix digits 1))))
565   (t
566    (czech-digits-1 digits))))
567
568(define (czech-prepend-numprefix token name)
569  (if (czech-item.has-feat token 'numprefix)
570      (string-append (item.feat token 'numprefix) name)
571      name))
572
573(define (czech-number* token name)
574  (czech-number (czech-prepend-numprefix token name)))
575
576(define (czech-number@ name)
577  (cond
578   ((string-equal name "0")
579    '("nula"))
580   ((string-equal name "00")
581    '("nula" "nula"))
582   ((string-matches name "0[1-9]")
583    (cons "nula" (czech-number (string-after name "0"))))
584   (t
585    (czech-number name))))
586
587(define (czech-number-from-digits digits)
588  (let ((len (length digits)))
589    (cond
590     ((equal? len 1)
591      (let ((d (car digits)))
592	(cond
593	 ((string-equal d "0") ())
594	 ((string-equal d "1") (list "jedna"))
595	 ((string-equal d "2") (list "dva"))
596	 ((string-equal d "3") (list "t�i"))
597	 ((string-equal d "4") (list "�ty�i"))
598	 ((string-equal d "5") (list "p�t"))
599	 ((string-equal d "6") (list "�est"))
600	 ((string-equal d "7") (list "sedm"))
601	 ((string-equal d "8") (list "osm"))
602	 ((string-equal d "9") (list "dev�t")))))
603     ((equal? len 2)
604      (if (string-equal (car digits) "1")
605	  (let ((d (car (cdr digits))))
606	    (cond
607	     ((string-equal d "0") (list "deset"))
608	     ((string-equal d "1") (list "jeden�ct"))
609	     ((string-equal d "2") (list "dvan�ct"))
610	     ((string-equal d "3") (list "t�in�ct"))
611	     ((string-equal d "4") (list "�trn�ct"))
612	     ((string-equal d "5") (list "patn�ct"))
613	     ((string-equal d "6") (list "�estn�ct"))
614	     ((string-equal d "7") (list "sedmn�ct"))
615	     ((string-equal d "8") (list "osmn�ct"))
616	     ((string-equal d "9") (list "devaten�ct"))))
617	  (append
618	   (let ((d (car digits)))
619	     (cond
620	      ((string-equal d "0") ())
621	      ((string-equal d "2") (list "dvacet"))
622	      ((string-equal d "3") (list "t�icet"))
623	      ((string-equal d "4") (list "�ty�icet"))
624	      ((string-equal d "5") (list "pades�t"))
625	      ((string-equal d "6") (list "�edes�t"))
626	      ((string-equal d "7") (list "sedmdes�t"))
627	      ((string-equal d "8") (list "osmdes�t"))
628	      ((string-equal d "9") (list "devades�t"))))
629	   (czech-number-from-digits (cdr digits)))))
630     ((equal? len 3)
631      (append
632       (let ((d (car digits)))
633	 (cond
634	  ((string-equal d "0") ())
635	  ((string-equal d "1") (list "sto"))
636	  ((string-equal d "2") (list "dv�" "st�"))
637	  ((string-equal d "3") (list "t�i" "sta"))
638	  ((string-equal d "4") (list "�ty�i" "sta"))
639	  ((string-equal d "5") (list "p�t" "set"))
640	  ((string-equal d "6") (list "�est" "set"))
641	  ((string-equal d "7") (list "sedm" "set"))
642	  ((string-equal d "8") (list "osm" "set"))
643	  ((string-equal d "9") (list "dev�t" "set"))))
644       (czech-number-from-digits (cdr digits))))
645     ((<= len 12)
646      (let ((concatenations '((t "tis�c" "tis�ce" "tis�c")
647			      (t "milion" "miliony" "milion�")
648			      (nil "miliarda" "miliardy" "miliard")))
649	    (n (- len 3)))
650	(while (> n 3)
651	  (set! concatenations (cdr concatenations))
652	  (set! n (- n 3)))
653	(let ((m n)
654	      (head-digits ())
655	      (tail-digits digits)
656	      (words (car concatenations)))
657	  (while (> m 0)
658	    (set! head-digits (cons (car tail-digits) head-digits))
659	    (set! tail-digits (cdr tail-digits))
660	    (set! m (- m 1)))
661	  (set! head-digits (reverse head-digits))
662	  (append
663	   (cond
664            ((let ((all-zero t)
665                   (d head-digits))
666               (while (and all-zero d)
667                 (if (string-equal (car d) "0")
668                     (set! d (cdr d))
669                     (set! all-zero nil)))
670               all-zero)
671             nil)
672	    ((and (equal? n 1) (string-equal (car digits) "1"))
673	     (list (car (cdr words))))
674	    ((and (equal? n 1) (string-matches (car digits) "[2-4]"))
675	     (list
676	      (cond
677	       ((string-equal (car digits) "2")
678		(if (car words) "dva" "dv�"))
679	       ((string-equal (car digits) "3") "t�i")
680	       ((string-equal (car digits) "4") "�ty�i"))
681	      (car (cdr (cdr words)))))
682	    (t
683	     (append
684	      (czech-number-from-digits head-digits)
685	      (list (car (cdr (cdr (cdr words))))))))
686	   (czech-number-from-digits tail-digits)))))
687     (t
688      (if czech-group-digits
689          (czech-digits (apply string-append digits))
690          (apply append (mapcar czech-number digits)))))))
691
692(define (czech-tokenize-on-nonalphas string)
693  (cond
694   ((string-equal string "")
695    nil)
696   ((string-matches string (string-append "^" czech-char-regexp "*$"))
697    (list string))
698   ((string-matches string "^[0-9]+$")
699    (symbolexplode string))
700   (t
701    (let ((i 0))
702      (while (string-matches (substring string i 1) czech-char-regexp)
703        (set! i (+ i 1)))
704      (if (eq? i 0)
705          (while (string-matches (substring string i 1) "[0-9]")
706                 (set! i (+ i 1))))
707      (append (if (> i 0)
708                  (let ((s (substring string 0 i)))
709                    (if (string-matches s "[0-9]+")
710                        (symbolexplode s)
711                        (list s)))
712                  nil)
713              (list (substring string i 1))
714              (czech-tokenize-on-nonalphas
715               (czech-suffix string (+ i 1))))))))
716
717(define (czech-token-to-words token name)
718  (cond
719   ;; Special terms
720   ((assoc_string (czech-downcase name) czech-multiword-abbrevs)
721    (apply append (mapcar (lambda (w) (czech-token-to-words token w))
722                          (cadr (assoc_string (czech-downcase name)
723                                              czech-multiword-abbrevs)))))
724   ((and (string-matches name "[ckm]m")
725         (item.prev token)
726         (czech-item.feat*? token "p.name" "[-+]?[0-9]+[.,]?[0-9]*"))
727    (list (cadr (assoc_string name '(("cm" "centimetr�") ("km" "kilometr�")
728                                     ("mm" "milimetr�"))))))
729   ;; Spaced numbers
730   ((and (or (string-matches name "^[-+]?[1-9][0-9]?[0-9]?$")
731             (czech-item.has-feat token 'numprefix))
732         (not (czech-item.has-feat token 'punc))
733         (item.feat token "n.whitespace" " ")
734         (string-matches (item.feat token "n.name") "^[0-9][0-9][0-9]$"))
735    (item.set_feat (item.next token) 'numprefix
736                   (czech-prepend-numprefix token name))
737    nil)
738   ;; Ordinal numbers
739   ((and (string-matches name "^[0-9]+$")
740         (czech-item.feat? token 'punc ".")
741         (item.next token)
742         (not (string-matches (item.feat token "n.whitespace") "  +")))
743    (if (not (czech-item.has-feat token 'punctype))
744        (item.set_feat token 'punctype 'num))
745    (append (czech-number* token name)
746            (list ".")))
747   ;; Numbers beginning with the zero digit
748   ((and (string-matches name "^0[0-9]*$")
749         (not (czech-item.has-feat token 'numprefix)))
750    (czech-digits name))
751   ;; Any other numbers
752   ((let ((nname (czech-prepend-numprefix token name)))
753      (or (string-matches nname "^[-+]?[0-9]+$")
754          (string-matches nname "^[-+]?[0-9]+[.,][0-9]+$")
755          (string-matches nname "^[-+]?[0-9]+,-$")))
756    (if (not (czech-item.has-feat token 'punctype))
757        (item.set_feat token 'punctype 'num))
758    (let ((nname (czech-prepend-numprefix token name)))
759      (if (and (czech-item.feat? token "n.name" "K�")
760               (string-matches nname "^[-+]?[0-9]+,[-0-9]+$"))
761          (append
762           (czech-number (string-before nname ","))
763           (list "korun")
764           (let ((hellers (string-after nname ",")))
765             (if (not (string-equal hellers "-"))
766                 (append
767                  (czech-number hellers)
768                  (list "hal���")))))
769          (czech-number nname))))
770   ;; Monetary sign
771   ((and (string-equal name "K�")
772         (string-matches (item.feat token "p.name") "^[-+]?[0-9]+,[-0-9]+$"))
773    nil)
774   ;; Acronyms
775   ((let ((capitals "^[A-Z����������ة����ݮ]+$"))
776      (and (string-matches name capitals)
777           (not (lex.lookup_all name))
778           (not (string-matches (item.feat token "p.name") capitals))
779           (not (string-matches (item.feat token "p.next") capitals))
780           (<= (length name) 3) ; longer pronouncable acronyms are not spelled
781           (not (string-equal name "�")) ; Festival bug workaround
782           ))
783    (let ((words ()))
784      (mapcar
785       (lambda (phoneme)
786         (let ((expansion (cadr (assoc_string (czech-downcase phoneme)
787                                              czech-multiword-abbrevs))))
788           (if expansion
789               (set! words (append words
790                                   (mapcar (lambda (w)
791                                             `((name ,w) (pos sym)))
792                                           expansion)))
793               (set! words (append words
794                                   (list `((name ,phoneme) (pos sym))))))))
795       (lts.apply name 'czech-normalize))
796      words))
797   ;; Abbreviations and other unpronouncable words
798   ((and (string-matches
799          name
800          "^[bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ����������ة��][bcdfghjkmnpqstvwxzBCDFGHJKMNPQSTVWXZ����������ة��]+$")
801         (not (lex.lookup_all name)))
802    (mapcar (lambda (phoneme) `((name ,phoneme) (pos sym)))
803            (lts.apply name 'czech-normalize)))
804   ;; Separators
805   ((and (string-matches name (string-append "^[^" czech-chars "0-9]+$"))
806         (>= (length name) 4)
807         (czech-all-same (symbolexplode name)))
808    (list czech-token.separator-word-name))
809   ((and (string-matches name (string-append "^[^" czech-chars "0-9]$"))
810         (eqv? (length (item.daughters token)) 0)
811         (let ((punc (item.feat token 'punc)))
812           (and (string-matches punc "...+") ; excludes, among others, punc==0
813                (string-equal (substring punc 0 1) name)
814                (czech-all-same (symbolexplode punc)))))
815    (item.set_feat token 'punc 0)
816    (list czech-token.separator-word-name))
817   ;; Time (just a few of many possible forms)
818   ((and (string-matches name "^[0-9]+:[0-9][0-9]$")
819         ;; try to identify ratios -- should be better done in POS tagging
820         (not (string-matches (item.feat token "p.name")
821                              "^[Pp][Oo][Mm][��].*"))
822         (not (string-matches (item.feat token "p.name")
823                              "^[Pp][Rr][Aa][Vv][Dd][��][Pp][Oo][Dd][Oo].*"))
824         (not (string-matches (item.feat token "p.name")
825                              "^[��][Aa][Nn][Cc].*")))
826    (append (czech-number@ (string-before name ":"))
827            (czech-number@ (string-after name ":"))))
828   ((string-matches name "^[0-9]+:[0-9][0-9]:[0-9][0-9]$")
829    (append (czech-number@ (string-before name ":"))
830            (czech-number@ (string-before (string-after name ":") ":"))
831            (czech-number@ (string-after (string-after name ":") ":"))))
832   ;; Ratios
833   ((string-matches name "^[0-9]+:[0-9]+$")
834    (append (czech-number (string-before name ":"))
835            '("ku")
836            (czech-number (string-after name ":"))))
837   ;; Numeric ranges (might be minus as well, but that's rare)
838   ((string-matches name "[0-9]+[.,]*[0-9]*-[0-9]+[.,]*[0-9]*$")
839    ;; we don't include signs here not to break phone numbers and such a
840    ;; written form is incorrect anyway
841    (append
842     (czech-token-to-words token (string-append
843                                  (substring name 0 1)
844                                  (string-before (substring name 1 1000) "-")))
845     '(((name "-") (pos range)))
846     (czech-token-to-words token (string-after (substring name 1 1000) "-"))))
847   ;; Homogenous tokens
848   ((string-matches name (string-append "^" czech-char-regexp "+$"))
849    (if (string-equal (czech-downcase name) "�") ; Festival bug workaround
850        (list "e�")
851        (list name)))
852   ((string-matches name (string-append "^[^" czech-chars "0-9]+$"))
853    (cond
854     ((> (length name) 10)
855      (list czech-token.garbage-word-name))
856     ((and (eqv? (length name) 1)
857           (string-equal (item.name token) name)
858           (or (not (string-matches (item.feat token 'prepunctuation) "0?"))
859               (not (string-matches (item.feat token 'punctuation) "0?"))))
860      ;; This handles the case when the whole token consists of two or more
861      ;; punctuation characters.  In such a case Festival picks one of the
862      ;; characters as the name, while the other characters are treated as
863      ;; punctuation.  We want all the character being handled as punctuation.
864      `(((name ,name) (pos punc))))
865     ((assoc_string name czech-multiword-abbrevs)
866      (cadr (assoc_string name czech-multiword-abbrevs)))
867     (t
868      (symbolexplode name))))
869   ;; Hyphens
870   ((string-matches name (string-append "^" czech-char-regexp "+-$"))
871    (czech-token-to-words token (string-before name "-")))
872   ((string-matches name
873      (string-append "^[" czech-chars "0-9]+-[-" czech-chars "0-9]+$"))
874    (append
875     (czech-token-to-words token (string-before name "-"))
876     '(((name "-") (pos punc)))       ; necessary for punctuation reading modes
877     (czech-token-to-words token (string-after name "-"))))
878   ;; Starting with digits
879   ((string-matches name "^[0-9].*")
880    (let ((i 0))
881      (while (member (substring name i 1)
882                     '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))
883        (set! i (+ i 1)))
884      (append (czech-digits (substring name 0 i))
885              (czech-token-to-words token (czech-suffix name i)))))
886   ;; Digits inside
887   ((string-matches name "^.*[0-9].*")
888    (let ((i 0)
889          j
890          (digits '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9")))
891      (while (not (member (substring name i 1) digits))
892        (set! i (+ i 1)))
893      (set! j (+ i 1))
894      (while (member (substring name j 1) digits)
895        (set! j (+ j 1)))
896      (append (czech-token-to-words token (substring name 0 i))
897              (czech-digits (substring name i (- j i)))
898              (czech-token-to-words token (czech-suffix name j)))))
899   ;; Lexicon words
900   ((lex.lookup_all name)
901    (list name))
902   ;; TODO: roman numerals
903   ;; Heterogenous tokens -- mixed alpha, numeric and non-alphanumeric
904   ;; characters
905   (t
906    (if (not (string-matches name (string-append "^[-" czech-chars "]+$")))
907        (item.set_feat token 'punctype nil))
908    (apply
909     append
910     (mapcar (lambda (name) (czech-token-to-words token name))
911             (czech-tokenize-on-nonalphas name))))))
912
913;;; Lexicon
914
915(defvar czech-lexicon-file "czech-lexicon.out")
916
917(lex.create "czech")
918(lex.set.phoneset "czech")
919(lex.select "czech")
920(let ((dirs '("/usr/local/share/festival/lib/dicts"))
921      (lexfile nil))
922  (while dirs
923    (let ((file (path-append (car dirs) czech-lexicon-file)))
924      (if (probe_file file)
925          (begin
926            (set! lexfile file)
927            (set! dirs nil))))
928    (set! dirs (cdr dirs)))
929  (if lexfile
930      (lex.set.compile.file lexfile)
931      (format t "warning: Czech lexicon file not found\n")))
932(lex.set.lts.method 'czech-lts)
933(lex.add.entry '("nezn�m�" nil (((n e z n a: m e:) 0))))
934
935;;; Part of Speech
936
937(defvar czech-guess-pos
938  '((prep0 "k" "s" "v" "z")
939    (prep "bez" "beze" "b�hem" "do" "ke" "ku" "krom" "krom�" "mezi" "mimo"
940          "m�sto" "na" "nad" "nade" "o" "od" "ode" "okolo" "po" "pod" "pode"
941          "pro" "proti" "p�ed" "p�ede" "p�es" "p�eze" "p�i" "se" "skrz"
942          "skrze" "u" "ve" "vyjma" "za" "ze" "zpoza")
943    (conj "a" "i" "ani" "nebo" "anebo")
944    (particle "a�" "k�" "nech�")
945    (question "co" "�emu" "��" "jak" "jak�" "jak�" "jak�" "kam" "kde"
946              "kdo" "kdy" "koho" "kolik" "kolik�t�" "kolik�t�" "kolik�t�"
947              "komu" "kterak" "kter�" "kter�" "kter�ho" "kter�mu" "kter�"
948              "kudy" "na�" "nakolik" "odkud" "pokolik�t�" "pro�")
949    (misc "aby" "abych" "abys" "abychom" "abyste" "ale" "alespo�" "aneb" "ani"
950          "ani�" "an�to" "aspo�" "av�ak" "a�" "a�" "a�koli" "a�koliv" "bu�"
951          "bu�to" "bu�si" "by" "by�" "by�si" "coby" "�i" "�ili" "div"
952          "dokdy" "dokonce" "dokud" "dotud" "jakby" "jakkoli" "jakkoliv"
953          "jakmile" "jako" "jakoby" "jako�" "jako�to" "jednak" "jednou"
954          "jeliko�" "jen" "jenom" "jenom�e" "jen�e" "jestli" "jestli�e" "je�t�"
955          "je�to" "jinak" "kde�to" "kdybych" "kdybys"
956          "kdyby" "kdybychom" "kdybyste" "kdy�" "kv�li"
957          "leda" "leda�e" "le�" "mezit�mco" "mimoto" "na�e�" "neb" "neboli"
958          "nebo�" "nejen" "nejen�e" "ne�" "ne�li" "ne�kuli" "nicm�n�" "n�br�"
959          "odkdy" "odkud" "pak" "pakli" "pakli�e" "podle" "podm�nky" "pokud"
960          "pon�vad�" "pop��pad�" "potom" "potud" "pot�" "pro�e�" "proto"
961          "proto�e" "pr�v�" "p�ece" "p�esto�e" "p�itom" "respektive" "sic"
962          "sice" "sotva" "sotva�e" "tak" "takov�" "taktak" "tak�e" "tak�"
963          "tedy" "ten" "teprve" "to" "toho" "tolik" "tomu" "toti�" "tu" "tud�"
964          "t�m" "t�eba" "t�ebas" "t�ebas�e" "t�eba�e" "v�ak" "v�dy�" "zat�mco"
965          "zda" "zdali" "zejm�na" "zrovna" "zvl�t�" "�e")))
966
967(define (czech-word-pos? word pos)
968  (member (item.name word)
969          (apply append (mapcar (lambda (p) (cdr (assoc p czech-guess-pos)))
970                                (if (consp pos) pos (list pos))))))
971
972(define (czech-pos-in-phrase-from word)
973  (let ((result 1)
974        (w word))
975    (while (and (item.prev w)
976                (or (not (czech-item.feat*? w "R:Token.p.name" "0?"))
977                    (and (czech-item.feat*? w "p.R:Token.parent.punc" "0?")
978                         (czech-item.feat*? w "R:Token.parent.prepunctuation"
979                                            "0?")
980                         (not (czech-item.feat*?
981                               w "p.name"
982                               (string-append "^[^" czech-chars "0-9]+$"))))))
983      (set! result (+ result 1))
984      (set! w (item.prev w)))
985    result))
986
987(define (czech-pos-in-phrase-to word)
988  (let ((result 1)
989        (w word))
990    (while (and (item.next w)
991                (or (czech-item.feat*? w "R:Token.n.name" "0?")
992                    (and (czech-item.feat*? w "R:Token.parent.punc" "0?")
993                         (czech-item.feat*?
994                          w "R:Token.parent.n.prepunctuation" "0?")
995                         (not (czech-item.feat*?
996                               w "n.name"
997                               (string-append "^[^" czech-chars "0-9]+$"))))))
998      (set! result (+ result 1))
999      (set! w (item.next w)))
1000    result))
1001
1002(define (czech-pos-last-in-phrase? word)
1003  (<= (czech-pos-in-phrase-to word) 1))
1004
1005(define (czech-pos utt)
1006  (mapcar
1007   (lambda (w)
1008     (let ((name (czech-downcase (item.name w)))
1009           (token (item.parent (item.relation w 'Token))))
1010       (cond
1011        ;; Feature already assigned
1012        ((czech-item.has-feat w 'pos)
1013         nil)
1014        ;; Word followed by a punctuation
1015        ((and (czech-item.has-feat token 'punctype)
1016              (string-matches name (string-append "^[^" czech-chars "0-9]+$")))
1017         (item.set_feat w 'pos (item.feat token 'punctype)))
1018        ;; Punctuation
1019        ((member name '("\"" "'" "`" "-" "." "," ":" ";" "!" "?" "(" ")"))
1020         ;; Is it a separate punctuation character?
1021         (if (eqv? (length
1022                    (item.daughters (item.parent (item.relation w 'Token))))
1023                   1)
1024             (item.set_feat w 'pos nil)
1025             (item.set_feat w 'pos 'punc)))
1026        ;; Single letter, not in the role of a word
1027        ((and (eq? (string-length name) 1)
1028              (czech-pos-last-in-phrase? w))
1029         (item.set_feat w 'pos 'sym))
1030        ;; Word "se", not in the role of a preposition
1031        ((and (string-equal name "se")  ; the word "se"
1032              (item.prev w)             ; not the first word
1033              (or (czech-pos-last-in-phrase? w) ; final word
1034                  (czech-word-pos? (item.next w) '(prep0 prep))
1035                                        ; followed by a preposition
1036                  ))
1037         (item.set_feat w 'pos 'se))
1038        ;; Question words with the `pak' suffix
1039        ((and (string-matches name ".*pak")
1040              (member (substring name 0 (- (length name) 3))
1041                      (cdr (assoc 'question czech-guess-pos))))
1042         (item.set_feat w 'pos 'question))
1043        ;; Nothing special: check the czech-guess-pos tree
1044        (t
1045         (let ((pos-sets czech-guess-pos))
1046           (while pos-sets
1047             (if (member name (cdar pos-sets))
1048                 (begin
1049                   (item.set_feat w 'pos (caar pos-sets))
1050                   (set! pos-sets nil))
1051                 (set! pos-sets (cdr pos-sets)))))
1052         ))))
1053   (utt.relation.items utt 'Word))
1054  ;; Add commas before conjunctions
1055  (mapcar (lambda (token)
1056            (if (and (czech-item.feat*? token 'punc "0?")
1057                     (czech-item.feat? token "daughtern.R:Word.n.gpos" 'conj))
1058                (item.set_feat token 'punc ",")))
1059          (utt.relation.items utt 'Token))
1060  utt)
1061
1062;;; Phrase breaks
1063
1064(define (czech-next-simple-punc word)
1065  (let ((unit (item.next (czech-word-stress-unit word))))
1066    (cond
1067     ((not unit)
1068      0)
1069     ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]")
1070      (czech-stress-unit-punc unit))
1071     ((czech-item.feat? unit 'preelement 1)
1072      (czech-next-punc word))
1073     (t
1074      0))))
1075
1076(define (czech-prev-simple-punc word)
1077  (let ((unit (item.prev (czech-word-stress-unit word))))
1078    (cond
1079     ((not unit)
1080      0)
1081     ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]")
1082      (czech-stress-unit-punc unit))
1083     (t
1084      (let ((token (item.prev (item.parent (item.relation word 'Token)))))
1085        (while (and token (not (string-matches (item.feat token 'punc) ".+")))
1086          (set! token (item.prev token)))
1087        (let ((pword (and token
1088                          (item.next token)
1089                          (item.daughter1 (item.next token)))))
1090          (if (and pword
1091                   (czech-item.feat? (czech-word-stress-unit pword)
1092                                     'preelement 1))
1093              (item.feat token 'punc)
1094              0)))))))
1095
1096(defvar czech-phrase-cart-tree
1097  ;; Note: Additional corrections are applied in czech-adjust-phrase-breaks
1098  ;; SB = (very) short break
1099  '(;; end of utterance
1100    (n.name is 0)
1101    ((BB))
1102    ;; exclude "punctuation words"
1103    ((name matches "[][\"'`.,:;!?(){}<>-]+")
1104     ((NB))
1105     ;; parentheses
1106     ((R:Token.parent.n.prepunctuation matches "(.*")
1107      ((R:Token.n.name is 0)
1108       ((B))
1109       ((NB)))
1110      ((lisp_token_end_punc matches ".*)")
1111       ((B))
1112       ;;
1113       ;; phonetic rules
1114       ;;
1115       ;; "big" punctuations
1116       ((lisp_token_end_punc matches ".*[.?!;]\"")
1117        ((BB))
1118        ((lisp_token_end_punc matches ".*[.?!;]")
1119         ((lisp_czech-next-token-punc matches "\".*")
1120          ((BB))
1121          ((XB1)))                       ; for following adjustments
1122         ;; "smaller" punctuations
1123         ((lisp_token_end_punc matches ".*[:-]")
1124          ;; dashes are treated as pbreaks only if separated by whitespaces
1125          ((R:Token.parent.n.daughter1.name is "-")
1126           ((R:Token.n.name is 0)
1127            ((B))
1128            ((NB)))
1129           ((B)))
1130          ;; "comma" punctuations
1131          ((lisp_token_end_punc matches ".*,")
1132           ((XB2))                      ; for following adjustments
1133           ;; nothing applies -- no break by default
1134           ((NB)))))))))))
1135
1136(define (czech-adjust-phrase-breaks utt)
1137  ;; This must be called after stress units are identified
1138  (mapcar (lambda (w)
1139            (cond
1140             ((czech-item.feat? w 'pbreak 'XB1) ; "big" punctuations
1141              ;; only one stress unit between punctuations makes them shorter
1142              (item.set_feat
1143               w 'pbreak
1144               (cond
1145                ((czech-item.feat? w "R:SylStructure.name" 0)
1146                 ;; not a word actually
1147                 'BB)
1148                ((or (czech-item.feat*? (czech-word-stress-unit w)
1149                                        "n.lisp_czech-stress-unit-punc"
1150                                        ".*[.?!;]\"?")
1151                     (czech-item.feat*? (czech-word-stress-unit w)
1152                                        "p.lisp_czech-stress-unit-punc"
1153                                        ".*[.?!;]\"?"))
1154                 'B)
1155                (t
1156                 'BB))))
1157             ((czech-item.feat? w 'pbreak 'XB2) ; "comma" punctuations
1158              ;; if only one stress unit separates from other punctuation or
1159              ;; the neighbor stress unit contains preelement, phrase break
1160              ;; *may* become shorter
1161              (item.set_feat
1162               w 'pbreak
1163               (cond
1164                ((czech-item.feat? w "R:SylStructure.name" 0)
1165                 ;; not a word actually
1166                 'B)
1167                ((czech-item.feat*? w "lisp_czech-next-simple-punc" ".*,")
1168                 'SB)
1169                ((czech-item.feat*? w "lisp_czech-prev-simple-punc" ".*,")
1170                 'B)
1171                ((czech-item.feat*? w "lisp_czech-prev-simple-punc"
1172                                    ".*[-.?!;:]\"?")
1173                 'SB)
1174                ((czech-item.feat*? (czech-word-stress-unit w)
1175                                    "n.lisp_czech-stress-unit-punc"
1176                                    ".*[-.?!;:]\"?")
1177                 'SB)
1178                (t
1179                 'B))))))
1180          (utt.relation.items utt 'Word)))
1181
1182;;; Segmentation
1183
1184(define (czech-adjust-segments segments)
1185  (if (not (null? segments))
1186      (let ((item1 (nth 0 segments))
1187            (item2 (nth 1 segments))
1188            (item3 (nth 2 segments))
1189            (item-word (lambda (i)
1190                         (item.parent
1191                          (item.parent
1192                           (item.relation i 'SylStructure))))))
1193        (let ((name1 (and item1 (item.name item1)))
1194              (name2 (and item2 (item.name item2)))
1195              (name3 (and item3 (item.name item3)))
1196              (same-word? (lambda (i1 i2)
1197                            (equal? (item-word i1) (item-word i2)))))
1198          ;; nasals
1199          (if (and (string-equal name1 "n")
1200                   (czech-item.feat? item2 "ph_postnas" '+)
1201                   (same-word? item1 item2))
1202              (item.set_name item1 "n*"))
1203          ;; sh
1204          (if (and (string-equal name1 "s")
1205                   (string-equal name2 "h")
1206                   (same-word? item1 item2))
1207              (if czech-moravian
1208                  (item.set_name item1 "z")
1209                  (item.set_name item2 "ch")))
1210          ;; unvoiced-r~
1211          (if (and (string-equal name2 "r~")
1212                   (czech-item.feat? item1 "ph_cvox" '-)
1213                   (same-word? item1 item2))
1214              (item.set_name item2 "r~*"))
1215          ;; voiced-unvoiced
1216          (if (and (czech-item.feat? item1 "ph_cvox" '+)
1217                   (not (czech-item.feat? item1 "ph_partner" 0))
1218                   item2
1219                   (or (string-equal name2 "#")
1220                       (string-equal name2 "_")
1221                       (czech-item.feat? item2 "ph_cvox" '-)
1222                       (and (czech-item.feat? item2 "ph_cvox" 'u)
1223                            (not (same-word? item1 item2))
1224                            (not (member
1225                                  (item.name (item-word item1))
1226                                  (append
1227                                   (list "v" "z")
1228                                   czech-proper-single-syl-prepositions))))))
1229              (item.set_name item1 (item.feat item1 "ph_partner")))
1230          ;; unvoiced-voiced
1231          (if (and (czech-item.feat? item1 "ph_cvox" '-)
1232                   (not (czech-item.feat? item1 "ph_partner" 0))
1233                   item2
1234                   (czech-item.feat? item2 "ph_cvox" '+)
1235                   (not (string-equal name2 "v"))
1236                   (not (string-equal name2 "r~")))
1237              (item.set_name item1 (item.feat item1 "ph_partner"))))
1238        (czech-adjust-segments (cdr segments)))))
1239
1240(define (czech-adjust-phonetic-form utt)
1241  (let ((items (utt.relation.items utt 'Segment)))
1242    (let ((names (mapcar item.name items))
1243          (old-names '()))
1244      (while (not (equal? old-names names))
1245        (czech-adjust-segments items)
1246        (set! old-names names)
1247        (set! names (mapcar item.name (utt.relation.items utt 'Segment))))))
1248  utt)
1249
1250(define (czech-intonation-units utt)
1251  ;; Mark syllables before phrase breaks
1252  (let ((token (utt.relation utt 'Token)))
1253    (while token
1254      (if (or (czech-item.feat*? token "daughtern.pbreak" "[SBX]?B[12]?")
1255              (czech-item.feat*? token "daughtern.p.pbreak" "[SBX]?B[12]?"))
1256          (let ((w (item.daughtern token)))
1257            (while (and w
1258                        (not (item.daughters (item.relation w 'SylStructure))))
1259              (set! w (item.prev w)))
1260            (if w
1261                (item.set_feat (item.daughtern (item.relation w 'SylStructure))
1262                               "sentence_break" 1))))
1263      (set! token (item.next token))))
1264  ;; Make the intonation units
1265  (utt.relation.create utt 'IntUnit)
1266  (let ((sylwords (utt.relation.items utt 'Syllable))
1267        (id 1)
1268        (unit-sylwords '()))
1269    (while sylwords
1270      (let ((w (car sylwords)))
1271        (set! unit-sylwords (cons w unit-sylwords))
1272        (set! sylwords (cdr sylwords))
1273        ;; If `w' is a last syllable before a relevant phrase break, make new
1274        ;; intonation unit
1275        (if (or (czech-item.feat*? w "sentence_break" 1)
1276                ;; This is the very last syllable (we reach this point when the
1277                ;; last token generates no words for whatever reason)
1278                (not (item.next w)))
1279            (begin
1280              (utt.relation.append
1281               utt 'IntUnit
1282               `("int" ((name ,(format nil "IUnit%d" id)))))
1283              (set! id (+ id 1))
1284              ;; Add the syllables to the intonation unit
1285              (let ((i (utt.relation.last utt 'IntUnit)))
1286                (set! unit-sylwords (reverse unit-sylwords))
1287                (while unit-sylwords
1288                  (item.append_daughter i (car unit-sylwords))
1289                  (set! unit-sylwords (cdr unit-sylwords))))))))))
1290
1291(define (czech-yes-no-question int-unit)
1292  (and (string-matches (item.feat
1293                        int-unit
1294                        "daughtern.R:SylStructure.parent.R:Token.parent.punc")
1295                       ".*\\?")
1296       (not (czech-item.feat? int-unit
1297                              "daughter1.R:SylStructure.parent.R:Word.pos"
1298                              'question))
1299       (not (czech-item.feat? int-unit
1300                              "daughter2.R:SylStructure.parent.R:Word.pos"
1301                              'question))))
1302
1303(defvar czech-proper-single-syl-prepositions
1304  '("bez" "do" "ke" "ku" "na" "nad" "o" "od" "po" "pod" "pro" "p�ed" "p�es"
1305    "p�i" "se" "u" "ve" "za" "ze"))
1306(defvar czech-special-final-words
1307  '("ho" "je" "jej" "ji" "jsem" "jsi" "jste" "m�" "mi" "se" "si" "t�" "ti"))
1308
1309(define (czech-syllable-kernels phonemes)
1310  (let ((kernels '()))
1311    (while phonemes
1312      ;; Starting syllabic consonant doesn't constitute syllable
1313      (if (and (czech-item.feat? (car phonemes) 'ph_vc '-)
1314               (czech-item.feat? (car phonemes) 'ph_syl '+))
1315          (set! phonemes (cdr phonemes)))
1316      ;; Skip non-syllabic consonants
1317      (while (and phonemes (czech-item.feat? (car phonemes) 'ph_syl '-))
1318        (set! phonemes (cdr phonemes)))
1319      (if phonemes
1320          ;; Now take the kernel
1321          (let ((kc '())
1322                (kv '()))
1323            (if (czech-item.feat? (car phonemes) 'ph_vc '-)
1324                (while (and phonemes
1325                            (czech-item.feat? (car phonemes) 'ph_vc '-)
1326                            (czech-item.feat? (car phonemes) 'ph_syl '+))
1327                  (set! kc (cons (car phonemes) kc))
1328                  (set! phonemes (cdr phonemes))))
1329            (while (and phonemes
1330                        (czech-item.feat? (car phonemes) 'ph_vc '+)
1331                        (czech-item.feat? (car phonemes) 'ph_syl '+))
1332              (set! kv (cons (car phonemes) kv))
1333              (set! phonemes (cdr phonemes)))
1334            (let ((k (reverse (or kv kc))))
1335              (let ((seg (and k (item.prev (car k)))))
1336                (while (and seg (or (czech-item.feat? seg 'ph_cvox '+)
1337                                    (czech-item.feat? seg 'ph_cvox 'u)))
1338                  (set! k (cons seg k))
1339                  (set! seg (item.prev seg))))
1340              (set! kernels (cons k kernels))))))
1341    (reverse kernels)))
1342
1343(define (czech-syllable-count phonemes)
1344  (length (czech-syllable-kernels phonemes)))
1345
1346(define (czech-stress-unit-phonemes unit)
1347  (if (and unit (not (consp unit)))
1348      (set! unit (item.daughters unit)))
1349  (apply append (mapcar (lambda (syl)
1350                          (if (not (eq? syl 'preelement))
1351                              (item.daughters
1352                               (item.relation syl 'SylStructure))))
1353                        unit)))
1354
1355(define (czech-unit-syllable-count unit)
1356  (czech-syllable-count (czech-stress-unit-phonemes unit)))
1357
1358(define (czech-identify-stress-units sylwords)
1359  (let ((units (mapcar list sylwords))
1360        (unit-word (lambda (unit)
1361                     (and (eqv? (length unit) 1)
1362                          (item.parent
1363                           (item.relation (car unit) 'SylStructure)))))
1364        (unit-word-name (lambda (unit)
1365                          (and (eqv? (length unit) 1)
1366                               (item.feat (car unit)
1367                                          "R:SylStructure.parent.name"))))
1368        (merge (lambda (list)
1369                 (set-car! list (append (car list) (cadr list)))
1370                 (set-cdr! list (cddr list)))))
1371    ;; Nothing to do if there is at most one word
1372    (if (<= (length units) 1)
1373        units
1374        (begin
1375          ;; Basic joining
1376          (let ((units* units))
1377            (while units*
1378              (let ((w (unit-word (car units*))))
1379                (if (or ;; Join non-syllabic prepositions
1380                     (czech-item.feat? w 'pos 'prep0)
1381                     ;; Join proper single-syllabic prepositions
1382                     (and (member (czech-downcase (item.name w))
1383                                  czech-proper-single-syl-prepositions)
1384                          (not (czech-item.feat? w "pos" "se"))))
1385                    (merge units*)))
1386              (set! units* (cdr units*))))
1387          ;; At most 1 word now?
1388          (if (<= (length units) 1)
1389              units
1390              (let ((last-unit (car (last units))))
1391                ;; Final single-syllabic word
1392                (if (and (<= (czech-unit-syllable-count last-unit) 1)
1393                         (not (member (unit-word-name last-unit)
1394                                      czech-special-final-words)))
1395                    (set-cdr! (nth_cdr (- (length units) 2) units) '())
1396                    (set! last-unit '()))
1397                ;; Initial single-syllabic words
1398                (let ((units* units)
1399                      (singles '()))
1400                  (while (and units*
1401                              (<= (czech-unit-syllable-count (car units*)) 1))
1402                    (set! singles (cons (car units*) singles))
1403                    (set! units* (cdr units*)))
1404                  (set! singles (reverse singles))
1405                  (let ((len (length singles)))
1406                    (cond
1407                     ((<= len 0)
1408                      nil)
1409                     ((<= len 1)
1410                      (set! units (cons (append (car singles) '(preelement)
1411                                                (car units*))
1412                                        (cdr units*)))
1413                      (set! units* units))
1414                     ((<= len 4)
1415                      (set! units (cons (apply append singles) units*)))
1416                     (t
1417                      (let ((first-unit '())
1418                            (n (/ len 2))
1419                            (i 0))
1420                        (while (< i n)
1421                          (set! first-unit (append (car singles) first-unit))
1422                          (set! singles (cdr singles))
1423                          (set! i (+ i 1)))
1424                        (set! units (cons (reverse first-unit)
1425                                          (cons (apply append singles)
1426                                                units*)))))))
1427                  ;; Middle word processing
1428                  (while units*
1429                    (let ((u (car units*)))
1430                      ;; The word "a"
1431                      (if (string-equal (unit-word-name u) "a")
1432                          (merge units*))
1433                      ;; Single-syllabic words
1434                      (let ((len (czech-unit-syllable-count u))
1435                            (singles '())
1436                            (slen 0)
1437                            (next-units* (cdr units*)))
1438                        (while (and next-units*
1439                                    (<= (czech-unit-syllable-count
1440                                         (car next-units*)) 1)
1441                                    (not (string-equal
1442                                          (unit-word-name (car next-units*))
1443                                          "a")))
1444                          (set! singles (cons (car next-units*) singles))
1445                          (set! slen (+ slen 1))
1446                          (set! next-units* (cdr next-units*)))
1447                        (set! singles (reverse singles))
1448                        (let ((merge-n (lambda (n units)
1449                                         (while (> n 0)
1450                                           (merge units)
1451                                           (set! n (- n 1))))))
1452                          (cond
1453                           ((eqv? slen 0)
1454                            nil)
1455                           ((eqv? slen 1)
1456                            (merge units*))
1457                           ((eqv? slen 2)
1458                            (if (and (<= len 4)
1459                                     (czech-random-choice '(t nil)))
1460                                (merge-n 2 units*)
1461                                (merge (cdr units*))))
1462                           ((eqv? slen 3)
1463                            (if (<= len 3)
1464                                (merge-n 3 units*)
1465                                (merge-n 2 (cdr units*))))
1466                           ((eqv? slen 4)
1467                            (cond
1468                             ((>= len 5)
1469                              (merge-n 3 (cdr units*)))
1470                             ((and (<= len 2)
1471                                   (czech-random-choice '(t nil)))
1472                              (merge-n 4 units*))
1473                             (t
1474                              (merge-n 2 units*)
1475                              (merge-n 1 (cdr units*)))))
1476                           ((eqv? slen 5)
1477                            (cond
1478                             ((<= len 3)
1479                              (merge-n 2 units*)
1480                              (merge-n 2 (cdr units*)))
1481                             ((<= len 4)
1482                              (merge-n 1 (cdr units*))
1483                              (merge-n 2 (cddr units*)))
1484                             (t
1485                              (merge-n 2 (cdr units*))
1486                              (merge-n 1 (cddr units*)))))
1487                           ((eqv? slen 6)
1488                            (cond
1489                             ((>= len 4)
1490                              (merge-n 2 (cdr units*))
1491                              (merge-n 2 (cddr units*)))
1492                             ((czech-random-choice '(t nil))
1493                              (merge-n 2 units*)
1494                              (merge-n 3 (cdr units*)))
1495                             (t
1496                              (merge-n 2 units*)
1497                              (merge-n 1 (cdr units*))
1498                              (merge-n 1 (cddr units*)))))
1499                           (t
1500                            ;; This very rare case is not defined in the rules
1501                            (while (>= slen 4)
1502                              (merge-n 1 (cdr units*))
1503                              (set! units* (cdr units*))
1504                              (set! slen (- slen 2)))
1505                            (merge-n (- slen 1) (cdr units*))
1506                            ))
1507                          (set! units* next-units*)))))
1508                  ;; That's all
1509                  (if last-unit
1510                      (append units (list last-unit))
1511                      units))))))))
1512
1513(define (czech-stress-units utt)
1514  (utt.relation.create utt 'IntStress)
1515  (utt.relation.create utt 'StressUnit)
1516  (let ((id 1)
1517        (int-unit (utt.relation.first utt 'IntUnit)))
1518    (while int-unit
1519      (let ((stress-units (czech-identify-stress-units
1520                           (item.daughters int-unit))))
1521        ;; Add the intonation unit at the top of the StressUnit relation
1522        (utt.relation.append utt 'IntStress int-unit)
1523        (while stress-units
1524          ;; Create new stress unit
1525          (item.relation.append_daughter int-unit 'IntStress
1526            `("stress" ((name ,(format nil "SUnit%d" id)) (position "M"))))
1527          (set! id (+ id 1))
1528          (utt.relation.append utt 'StressUnit
1529                               (item.relation.daughtern int-unit 'IntStress))
1530          ;; Fill it with its words
1531          (let ((i (utt.relation.last utt 'StressUnit)))
1532            (mapcar (lambda (syl)
1533                      (if (eq? syl 'preelement)
1534                          (item.set_feat i "preelement" 1)
1535                          (begin
1536                            (item.append_daughter i syl)
1537                            (let ((j (item.daughtern i)))
1538                              (mapcar (lambda (seg)
1539                                        (item.append_daughter j seg))
1540                                      (item.daughters syl))))))
1541                    (car stress-units)))
1542          (set! stress-units (cdr stress-units))))
1543      ;; The first stress unit in an intonation unit has position I
1544      (item.set_feat (item.relation.daughter1 int-unit 'IntStress)
1545                     "position" "I")
1546      ;; The last stress unit in an intonation unit has position F or FF
1547      ;; (overrides I in case of a conflict)
1548      (item.set_feat (item.relation.daughtern int-unit 'IntStress) "position"
1549       (if (string-matches
1550            (item.feat int-unit
1551                       "daughtern.R:SylStructure.parent.R:Token.parent.punc")
1552            ".*[.!?;:].*")
1553           (if (czech-yes-no-question int-unit) "FF-IT" "FF-KKL")
1554           "F"))
1555      ;; Special case: F-1 positions overriding I and M
1556      (if (not (equal? (item.relation.daughtern int-unit 'IntStress)
1557                       (item.relation.daughter1 int-unit 'IntStress)))
1558          (let ((last-pos (item.feat int-unit
1559                                     "R:IntStress.daughtern.position")))
1560            (item.set_feat (item.prev
1561                            (item.relation.daughtern int-unit 'IntStress))
1562                           "position" (string-append last-pos "-1"))))
1563      (set! int-unit (item.next int-unit)))))
1564
1565(define (czech-word utt)
1566  (Classic_Word utt)
1567  (czech-intonation-units utt)
1568  (czech-stress-units utt)
1569  (czech-adjust-phrase-breaks utt)
1570  utt)
1571
1572;;; Pauses
1573
1574(define (czech-add-strokes utt)
1575  (let ((stroke '(_ (("name" _))))
1576        (i (utt.relation.first utt 'SylStructure)))
1577    (while i
1578      ;; Insert _ before vowels at the beginning of word boundaries
1579      (if (and (czech-item.feat? i "daughter1.daughter1.ph_vc" '+)
1580               (item.prev i)
1581               (not (czech-item.feat? i "daughter1.daughter1.R:Segment.p.name"
1582                                      '#)))
1583          (item.insert
1584           (item.relation (item.daughter1 (item.daughter1 i)) 'Segment)
1585           stroke 'before))
1586      (set! i (item.next i)))))
1587
1588(define (czech-pause-breaks utt)
1589  (Classic_Pauses utt)
1590  (let ((words (utt.relation.items utt 'Word)))
1591    ;; Handle SB -- Classic_Pauses doesn't know about it
1592    (mapcar
1593     (lambda (w)
1594       (if (czech-item.feat? w "pbreak" 'SB)
1595           (insert_pause utt w)))
1596     words)))
1597
1598(define (czech-pause utt)
1599  (czech-pause-breaks utt)
1600  (czech-add-strokes utt)
1601  (czech-adjust-phonetic-form utt)
1602  utt)
1603
1604;;; Accents
1605
1606(defvar czech-accent-cart-tree '(NONE))
1607
1608;; Intonation
1609
1610(defvar czech-int-contours
1611  '(((A 1) (0.02 -0.05) (0.02 -0.04) (0 0))
1612    ((B 1) (-0.01 0.02) (-0.02 0.04) (-0.02 0.05))
1613    ((C 1) (-0.04 -0.10) (0.02 -0.16) (-0.02 -0.12) (-0.02 -0.14))
1614    ((D 1) (-0.14 0.16) (-0.14 0.20))
1615    ((FA 1) (0.02 -0.04) (0 0))
1616    ((FB 1) (-0.02 0.04) (-0.02 0.05))
1617    ((A 2) (0.02 -0.05) (0.04 -0.08) (-0.03 0))
1618    ((B 2) (-0.04 0.06) (-0.02 0.04) (-0.02 0.07))
1619    ((C 2) (0 -0.10) (-0.04 -0.10) (-0.02 -0.12) (0.02 -0.16))
1620    ((D 2) (-0.06 0.08) (-0.10 0.14))
1621    ((FA 2) (0.04 -0.08) (-0.03 0))
1622    ((FB 2) (-0.02 0.04) (-0.02 0.07))
1623    ((A 3) (0.02 -0.02 -0.04) (0.02 -0.04 -0.02) (0.04 -0.04 -0.04)
1624           (0 0 -0.02) (0 -0.04 0) (-0.04 0.08 -0.10) (-0.04 0.04 -0.04)
1625           (-0.02 -0.01 0))
1626    ((B 3) (0 -0.04 0.04) (0 -0.06 0.04) (-0.06 0.04 0.02)
1627           (-0.01 0.04 0.02) (-0.06 0 0.06) (-0.06 0.02 0.04)
1628           (-0.04 0.04 -0.04))
1629    ((C 3) (0 -0.05 -0.05) (-0.04 -0.02 -0.08) (-0.06 -0.04 -0.04)
1630           (-0.06 -0.10 -0.02))
1631    ((D 3) (-0.06 -0.01 0.09) (-0.06 0.08 -0.01))
1632    ((FA 3) (-0.04 0.08 -0.10) (-0.04 0.04 -0.04) (-0.02 -0.01 0))
1633    ((FB 3) (-0.06 0 0.06) (-0.06 0.02 0.04) (-0.04 0.04 -0.04))
1634    ((A 4) (0 0 -0.02 -0.01) (-0.02 0 -0.03 0) (-0.03 0.03 -0.02 -0.01)
1635           (0 0 -0.01 0))
1636    ((B 4) (0 -0.03 0.01 0.02) (-0.02 0 0.02 0.02) (0 -0.03 0.03 0.02))
1637    ((C 4) (-0.04 -0.06 -0.02 -0.02) (-0.02 -0.02 -0.04 -0.06)
1638           (-0.02 -0.08 -0.04 -0.02))
1639    ((D 4) (-0.06 0 -0.01 0.12) (-0.06 0.12 0 -0.03))
1640    ((FA 4) (-0.03 0.03 -0.02 -0.01) (0 0 -0.01 0))
1641    ((FB 4) (-0.02 0 0.02 0.02) (0 -0.03 0.03 0.02))
1642    ((A 5) (-0.02 0.02 -0.02 -0.01 0) (-0.03 0.03 0 0 -0.03)
1643           (-0.02 0.02 0 0 -0.02))
1644    ((B 5) (0 -0.03 0.01 0.02 0.01) (0.01 -0.02 0 0 0.02)
1645           (-0.02 0 0.02 0.02 0))
1646    ((C 5) (-0.02 0 -0.02 -0.04 -0.06) (-0.02 -0.08 -0.02 -0.02 -0.02)
1647           (-0.02 -0.02 -0.08 -0.02 -0.02))
1648    ((D 5) (-0.06 0 -0.01 -0.01 0.13) (-0.06 0.13 0 -0.04 -0.04))
1649    ((FA 5) (-0.02 0.02 0 0 -0.02))
1650    ((FB 5)  (-0.02 0 0.02 0.02 0))
1651    ((A 6) (-0.02 0.02 -0.01 0 (0) -0.02 -0.01))
1652    ((B 6) (0 -0.01 0 0 (0) 0.01 0.01) (0 -0.02 0.01 0.01 (0) 0.01 0.02))
1653    ((C 6) (-0.02 0 -0.02 -0.04 -0.06 0 (0))
1654           (-0.02 -0.08 -0.02 -0.02 -0.02 (0))
1655           (-0.02 -0.02 -0.08 -0.02 -0.02 -0.02 (0)))
1656    ((D 6) (-0.06 0 -0.01 -0.01 0 (0) 0.13) (0.13 0 -0.02 0 (0) -0.04 -0.04))
1657    ((FA 6) (-0.02 0.02 -0.01 0 (0) -0.02 -0.01))
1658    ((FB 6) (0 -0.02 0.01 0.01 (0) 0.01 0.02))
1659    ))
1660
1661(defvar czech-int-contour-tree
1662  ;; Contourtype set: A, B, C, D, FA and FB (for F and F-1 positions)
1663  '((position is I)
1664    ((preelement > 0)
1665     ((B))
1666     ((A)))
1667    ((position is M)
1668     ((p.contourtype is B)
1669      ((A))
1670      ((B)))
1671     ((position is F-1) ((FB))
1672      ((position is F) ((FA))
1673       ((position is FF-KKL-1) ((A))
1674        ((position is FF-KKL) ((C))
1675         ((position is FF-IT-1) ((B))
1676          ((position is FF-IT) ((D))
1677           ((ERROR)))))))))))
1678
1679(define (czech-int-select-contours utt)
1680  (let ((unit (utt.relation utt 'StressUnit))
1681        (last-contour nil))
1682    (while unit
1683      (let ((position (item.feat unit 'position)))
1684        ;; Determine appropriate contour type
1685        (let ((contourtype (wagon_predict unit czech-int-contour-tree)))
1686          (item.set_feat unit "contourtype" contourtype)
1687          ;; Find particular contour
1688          (let ((nsyls (czech-unit-syllable-count unit)))
1689            (let ((contour (czech-random-choice
1690                            (cdr (assoc (list contourtype
1691                                              (if (<= nsyls 6) nsyls 6))
1692                                        czech-int-contours)))))
1693              ;; Adjust the first syllables of final contours
1694              (if (or (string-equal position "F")
1695                      (string-matches position "FF.*[A-Z]"))
1696                  (let ((adjust-contour
1697                         (lambda (c adj)
1698                           (if last-contour
1699                               (cons (+ (car (last last-contour)) adj) (cdr c))
1700                               c))))
1701                    (cond
1702                     ((string-equal position "F")
1703                      (set! contour (adjust-contour contour -0.02)))
1704                     ((string-equal position "FF-KKL")
1705                      (set! contour (adjust-contour contour 0.02)))
1706                     ((string-equal position "FF-IT")
1707                      (set! contour (adjust-contour contour -0.02))))))
1708              ;; Set contour values for preelements
1709              (if (czech-item.feat? unit 'preelement 1)
1710                  (set! contour (cons (- (car contour) 0.02) contour)))
1711              ;; Finalize contours of long units
1712              (let ((n (- nsyls 6)))
1713                (if (>= n 0)
1714                    (let ((prefix '())
1715                          (contour* contour))
1716                      (while (not (consp (car contour*)))
1717                        (set! prefix (cons (car contour*) prefix))
1718                        (set! contour* (cdr contour*)))
1719                      (let ((val (caar contour*)))
1720                        (set! contour* (cdr contour*))
1721                        (while (> n 0)
1722                          (set! contour* (cons val contour*))
1723                          (set! n (- n 1)))
1724                        (set! contour (append (reverse prefix)
1725                                              contour*))))))
1726              (set! last-contour contour)
1727              (item.set_feat unit 'contour contour)))))
1728      (set! unit (item.next unit)))
1729    ;; Spread the contours on sylwords
1730    (set! unit (utt.relation utt 'StressUnit))
1731    (while unit
1732      (let ((contour (item.feat unit 'contour))
1733            (kernels (czech-syllable-kernels
1734                      (czech-stress-unit-phonemes unit))))
1735        (if (eqv? (length kernels) 1)
1736            ;; One-syllabic units have two-number contours
1737            ;; (they can occur only in the final positions)
1738            (let ((k (car kernels))
1739                  (contour-1 (car contour))
1740                  (contour-2 (cadr contour)))
1741              (let ((k* (reverse k))
1742                    (last-k (car (last k)))
1743                    (contour-list (list (list 0.1 contour-1)
1744                                        (list 0.9 contour-2))))
1745                (if (eqv? (length k) 1)
1746                    ;; Single phone in kernel -- put both values on it
1747                    (item.set_feat (car k) 'contourval contour-list)
1748                    ;; Multiple phones -- spread the values over true kernel
1749                    (begin
1750                      (while (czech-item.feat? (cadr k*) 'ph_vc '+)
1751                        (set! k* (cdr k*)))
1752                      (if (eq? (car k*) last-k)
1753                          (item.set_feat last-k 'contourval contour-list)
1754                          (begin
1755                            (item.set_feat (car k*) 'contourval contour-1)
1756                            (item.set_feat last-k 'contourval contour-2)))))
1757                ;; Extend the contour pair to certain neighbors
1758                (set! k* (cdr k*))
1759                (while k*
1760                  (item.set_feat (car k*) 'contourval contour-1)
1761                  (set! k* (cdr k*)))
1762                (let ((next-k (item.next last-k)))
1763                  (while (or (czech-item.feat? next-k 'ph_cvox '+)
1764                             (czech-item.feat? next-k 'ph_cvox 'u))
1765                    (item.set_feat next-k 'contourval contour-2)
1766                    (set! next-k (item.next next-k))))))
1767            ;; Otherwise spread the contour value over all kernels
1768            (while kernels
1769              (let ((contourval (car contour)))
1770                (mapcar (lambda (seg)
1771                          (item.set_feat seg 'contourval contourval))
1772                        (car kernels)))
1773              (set! kernels (cdr kernels))
1774              (set! contour (cdr contour)))))
1775      (set! unit (item.next unit)))))
1776
1777(defvar czech-int-simple-params '((f0_mean 100) (f0_std 10)))
1778
1779(define (czech-int-targets utt syl)
1780  (let ((segments (item.relation.daughters syl 'SylStructure))
1781        (syl-start (item.feat syl 'syllable_start))
1782        (f0-base (cadr (assq 'f0_mean int_general_params)))
1783        (f0-std (/ (cadr (assq 'f0_std int_general_params)) 10))
1784        (times-values '()))
1785    (let ((last-seg-end syl-start)
1786          (f0-value (lambda (contourval)
1787                      (* f0-base (+ 1 (* f0-std contourval))))))
1788      (while segments
1789        (let ((s (car segments)))
1790          (let ((contourval (and (czech-item.has-feat s 'contourval)
1791                                 (item.feat s 'contourval)))
1792                (seg-end (item.feat s 'end)))
1793            (cond
1794             ((consp contourval)
1795              (let ((tlen (- seg-end last-seg-end)))
1796                (set! times-values
1797                      (append
1798                       (mapcar (lambda (v)
1799                                 (list (+ last-seg-end
1800                                          (* (read-from-string (car v)) tlen))
1801                                       (f0-value (cadr v))))
1802                               (reverse contourval))
1803                       times-values))))
1804             (contourval
1805              (let ((time (/ (+ last-seg-end seg-end) 2.0))
1806                    (value (f0-value contourval)))
1807                (set! times-values (cons (list time value) times-values)))))
1808            (set! last-seg-end seg-end)
1809            (set! segments (cdr segments))))))
1810    ;; Festival apparently decreases F0 at the end of the utterance, prevent it
1811    (if (not (null? times-values))
1812        (let ((last-time (car (car times-values)))
1813              (last-value (cadr (car times-values)))
1814              (last-seg (item.relation.daughtern syl 'SylStructure)))
1815          (set! times-values (cons (list (czech-max (- (item.feat last-seg 'end) 0.01)
1816                                                    (+ last-time 0.001))
1817                                         last-value)
1818                                   times-values))))
1819    (reverse times-values)))
1820
1821;;; Duration
1822
1823(defvar czech-phoneme-durations
1824  '((#   0.100)
1825    (_   0.025)
1826    (a   0.098)
1827    (a:  0.142)
1828    (b   0.067)
1829    (c   0.102)
1830    (ch  0.087)
1831    (c~  0.099)
1832    (d   0.062)
1833    (dz  0.108)
1834    (dz~ 0.094)
1835    (d~  0.077)
1836    (e   0.099)
1837    (e:  0.126)
1838    (f   0.089)
1839    (g   0.067)
1840    (h   0.064)
1841    (i   0.077)
1842    (i:  0.120)
1843    (j   0.065)
1844    (k   0.080)
1845    (l   0.057)
1846    (m   0.068)
1847    (n   0.075)
1848    (n*  0.098)
1849    (n~  0.079)
1850    (o   0.089)
1851    (o:  0.137)
1852    (p   0.079)
1853    (r   0.060)
1854    (r~  0.065)
1855    (r~* 0.073)
1856    (s   0.098)
1857    (s~  0.090)
1858    (t   0.082)
1859    (t~  0.090)
1860    (u   0.082)
1861    (u:  0.139)
1862    (v   0.058)
1863    (z   0.077)
1864    (z~  0.074)
1865    ))
1866
1867(defvar czech-silence-durations
1868  '(("BB" 0.206 0.238) ("B" 0.082 0.095) ("SB" 0.008 0.010)))
1869
1870(defvar czech-stress-duration-factors
1871  '((1  1.03)
1872    (2  1.02)
1873    (3  1.01)
1874    (4  1.00)
1875    (5  1.00)
1876    (6  0.99)
1877    (7  0.98)
1878    (8  0.96)
1879    (9  0.94)
1880    (10 0.93)
1881    (11 0.91)
1882    (12 0.90)))
1883
1884(defvar czech-duration-random-factor 0.2)
1885
1886(define (czech-duration-pauses utt)
1887  (let ((word (utt.relation.first utt 'Word)))
1888    (while word
1889
1890      (let ((durspec (assoc_string (item.feat word "pbreak")
1891                                   czech-silence-durations)))
1892        (if durspec
1893            (let ((min (nth 1 durspec))
1894                  (max (nth 2 durspec))
1895                  (seg (find_last_seg word)))
1896              (if seg
1897                  (item.set_feat
1898                   (item.next (item.relation seg 'Segment))
1899                   'dur_factor
1900                   (* 10 (+ min (* (- max min) (czech-rand)))))))))
1901      (set! word (item.next word)))))
1902
1903(define (czech-duration-factors utt)
1904  (let ((sunit (utt.relation.first utt 'StressUnit)))
1905    (while sunit
1906      (let ((nphones (length (czech-stress-unit-phonemes sunit))))
1907        (cond
1908         ((> nphones 12)
1909          (set! nphones 12))
1910         ((< nphones 1)
1911          (set! nphones 1)))
1912        (let ((factor (cadr (assoc nphones czech-stress-duration-factors))))
1913          (mapcar (lambda (syl)
1914                    (mapcar (lambda (seg)
1915                              (item.set_feat seg "dur_factor" factor))
1916                            (item.relation.daughters syl 'SylStructure)))
1917                  (item.relation.leafs sunit 'StressUnit))))
1918      (set! sunit (item.next sunit))))
1919  ;; Adjust duration factors for initial single-syllabic word
1920  ;; (Take the initial word from Word, not just SylStructure, which may contain
1921  ;; prepunctuation.)
1922  (let ((1st-word (utt.relation.first utt 'Word)))
1923    (while (and 1st-word
1924                (item.daughter1 1st-word)
1925                (item.daughter1 (item.daughter1 1st-word)))
1926      (set! 1st-word (item.next 1st-word)))
1927    (let ((phonemes (and 1st-word
1928                         (apply append
1929                                (mapcar item.daughters
1930                                        (item.daughters
1931                                         (item.relation 1st-word
1932                                                        'SylStructure)))))))
1933      (if (eqv? (czech-syllable-count phonemes) 1)
1934          (let ((durfact (cadr (assoc (czech-min (length phonemes) 12)
1935                                      czech-stress-duration-factors))))
1936            (mapcar (lambda (ph) (item.set_feat ph 'dur_factor durfact))
1937                    phonemes))))))
1938
1939(define (czech-duration-compute utt)
1940  (mapcar
1941   (lambda (seg)
1942     (let ((factor (* (item.feat seg "dur_factor")
1943                      (Param.get 'Duration_Stretch))))
1944       (item.set_feat seg "end"
1945                      (+ (item.feat seg "start")
1946                         (* (if (<= factor 0) 1 factor)
1947                            (cadr (assoc_string (item.name seg)
1948                                                czech-phoneme-durations*)))))))
1949   (utt.relation.items utt 'Segment)))
1950
1951(define (czech-duration utt)
1952  (czech-duration-pauses utt)
1953  (czech-duration-factors utt)
1954  (czech-duration-compute utt)
1955  utt)
1956
1957;;; Volume
1958
1959(defvar czech-volume-scale 1.8)
1960(defvar czech-volume-scale* nil)
1961
1962(define (czech-adjust-volume utt)
1963  (utt.wave.rescale utt czech-volume-scale*))
1964
1965;;; Final phoneme translation
1966
1967(define (czech-translate-add-vowels utt)
1968  (if (and (string-equal (Param.get 'Language) 'czech)
1969           czech-insert-filling-vowels)
1970      (let ((i (utt.relation.first utt 'Segment))
1971            (insert-item (lambda (name orig-ph end pos)
1972                           (let ((feats (item.features orig-ph))
1973                                 (new-feats `((name ,name) (end ,end))))
1974                             (while feats
1975                               (if (not (member (caar feats) '(id name end)))
1976                                   (set! new-feats (cons (car feats)
1977                                                         new-feats)))
1978                               (set! feats (cdr feats)))
1979                             (item.insert orig-ph (cons name (list new-feats))
1980                                          pos)
1981                             (let ((new ((if (eq? pos 'after)
1982                                             item.next item.prev)
1983                                         orig-ph)))
1984                               (if (member 'SylStructure
1985                                           (item.relations orig-ph))
1986                                   (item.relation.insert orig-ph 'SylStructure
1987                                                         new pos))))))
1988            (vowel? (lambda (ph) (czech-item.feat? ph 'ph_vc '+)))
1989            (last-end 0.0))
1990        (while i
1991          (let ((end (item.feat i 'end)))
1992            (cond
1993             ;; Duplicate vowels
1994             ((vowel? i)
1995              (insert-item (item.name i) i (/ (+ last-end end) 2) 'before)))
1996            (set! last-end end))
1997          (set! i (item.next i)))))
1998  utt)
1999
2000(define (czech-translate-phonemes utt)
2001  (if (and (string-equal (Param.get 'Language) 'czech)
2002           czech-phoneset-translation*)
2003      (mapcar
2004       (lambda (item)
2005         (let ((tr (assoc (item.name item) czech-phoneset-translation*)))
2006           (if tr (item.set_name item (cadr tr)))))
2007       (utt.relation.items utt 'Segment)))
2008  utt)
2009
2010(defvar czech-after-analysis-hooks
2011  (list czech-translate-add-vowels czech-translate-phonemes))
2012
2013;;; Finally, the language definition itself
2014
2015(define (czech-reset-parameters)
2016  (set! czech-lts-extra-rules* czech-lts-extra-rules)
2017  (set! czech-int-simple-params* czech-int-simple-params)
2018  (set! czech-phoneme-durations* czech-phoneme-durations)
2019  (set! czech-volume-scale* czech-volume-scale)
2020  (set! czech-phoneset-translation* czech-phoneset-translation)
2021  (set! czech-after-analysis-hooks* czech-after-analysis-hooks)
2022  (Param.set 'Synth_Method 'UniSyn))
2023
2024(define (voice-czech-common)
2025  (voice_reset)
2026  (Param.set 'Language 'czech)
2027  ;; Phone set
2028  (Param.set 'PhoneSet 'czech)
2029  (PhoneSet.select 'czech)
2030  (set! pos_lex_name nil)
2031  ;; Tokenization
2032  (set! token.unknown_word_name czech-token.unknown-word-name)
2033  (set! token.whitespace czech-token.whitespace)
2034  (set! token.punctuation czech-token.punctuation)
2035  (set! token.prepunctuation czech-token.prepunctuation)
2036  (set! token_to_words czech-token-to-words)
2037  (Param.set 'Token_Method 'Token_Any)
2038  ;; Lexicon selection
2039  (lex.select "czech")
2040  ;; Segmentation
2041  (Param.set 'Word_Method 'czech-word)
2042  ;; Part of speech
2043  (set! guess_pos czech-guess-pos)      ; not actually used
2044  (Param.set 'POS_Method czech-pos)
2045  ;; Simple phrase break prediction by punctuation
2046  (set! pos_supported nil)
2047  (set! phrase_cart_tree czech-phrase-cart-tree)
2048  (Param.set 'Phrase_Method 'cart_tree)
2049  (Param.set 'Phrasify_Method Classic_Phrasify)
2050  ;; Pauses
2051  (Param.set 'Pause_Method czech-pause)
2052  ;; Accent prediction and intonation
2053  (set! int_accent_cart_tree czech-accent-cart-tree)
2054  (Param.set 'Int_Method czech-int-select-contours)
2055  (set! int_general_params (cons (list 'targ_func czech-int-targets)
2056                                 czech-int-simple-params*))
2057  (Param.set 'Int_Target_Method Int_Targets_General)
2058  ;; Duration prediction
2059  (Param.set 'Duration_Method czech-duration)
2060  ;; Postlex rules
2061  (set! postlex_rules_hooks '())
2062  (set! after_analysis_hooks czech-after-analysis-hooks*)
2063  ;; Final voice adjustment
2064  (set! after_synth_hooks (list czech-adjust-volume))
2065  ;; Set current voice
2066  (set! current_voice_reset nil)
2067  (set! current-voice 'czech))
2068
2069(defmac (czech-proclaim-voice form)
2070  (let ((name (nth 1 form))
2071        (description (nth 2 form))
2072        (body (nth_cdr 3 form))
2073        (options ()))
2074    (if (consp name)
2075        (begin
2076          (set! options (cdr name))
2077          (set! name (car name))))
2078    (set! name (intern (string-append 'czech_ name)))
2079    (let ((parameters `((language czech)
2080                        (dialect ,(cdr (assoc 'dialect options)))
2081                        (gender ,(cadr (assoc 'gender options)))
2082                        (coding ISO-8859-2)
2083                        (description ,description))))
2084      `(begin
2085         (define (,(intern (string-append 'voice_ name)))
2086           (czech-reset-parameters)
2087           ,@body
2088           (voice-czech-common)
2089           (set! current-voice (quote ,name)))
2090         (proclaim_voice
2091          (quote ,name)
2092          (quote ,parameters))))))
2093
2094(provide 'czech)
2095