1#!/bin/sh
2###########################################################################
3##                                                                       ##
4##                  Language Technologies Institute                      ##
5##                     Carnegie Mellon University                        ##
6##                        Copyright (c) 2004                             ##
7##                        All Rights Reserved.                           ##
8##                                                                       ##
9##  Permission is hereby granted, free of charge, to use and distribute  ##
10##  this software and its documentation without restriction, including   ##
11##  without limitation the rights to use, copy, modify, merge, publish,  ##
12##  distribute, sublicense, and/or sell copies of this work, and to      ##
13##  permit persons to whom this work is furnished to do so, subject to   ##
14##  the following conditions:                                            ##
15##   1. The code must retain the above copyright notice, this list of    ##
16##      conditions and the following disclaimer.                         ##
17##   2. Any modifications must be clearly marked as such.                ##
18##   3. Original authors' names are not deleted.                         ##
19##   4. The authors' names are not used to endorse or promote products   ##
20##      derived from this software without specific prior written        ##
21##      permission.                                                      ##
22##                                                                       ##
23##  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         ##
24##  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ##
25##  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ##
26##  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      ##
27##  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ##
28##  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ##
29##  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ##
30##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ##
31##  THIS SOFTWARE.                                                       ##
32##                                                                       ##
33###########################################################################
34##                                                                       ##
35##  Make the files from the Festival Released version                    ##
36##                                                                       ##
37###########################################################################
38
39if [ "x$FLITEDIR" = "x" ]
40then
41   FLITEDIR=`pwd`/../..
42fi
43
44if [ $# = 0 ]
45then
46   $0 setup
47   $0 lts
48   $0 lex
49   $0 compresslex
50   $0 install
51   exit
52fi
53
54if [ "$1" = "setup" ]
55then
56   tar zxvf festlex_CMU.tar.gz
57   cp -p $FESTVOXDIR/src/lts/build_lts festival/lib/dicts/cmu
58   cp -p $FESTVOXDIR/src/lts/build_lts_rules festival/lib/dicts/cmu
59   mkdir festival/lib/dicts/cmu/c
60   mkdir festival/lib/dicts/cmu/wfst
61   mkdir festival/lib/dicts/cmu/lts_scratch
62fi
63
64if [ "$1" = "lts" ]
65then
66  (cd festival/lib/dicts/cmu;
67   festival --heap 10000000 -b allowables.scm '(dump-flat-entries-all "cmudict-0.4.out" "lts_scratch/lex_entries.out")'
68   ./build_lts cummulate
69   ./build_lts align
70   ./build_lts build
71   ./build_lts merge
72   ./build_lts test
73   festival --heap 10000000 -b $FLITEDIR/tools/make_lts_wfst.scm lts_scratch/lex_lts_rules.scm '(lts_to_rg_to_wfst lex_lts_rules "wfst/")';
74   festival --heap 10000000 -b $FLITEDIR/tools/make_lts.scm lts_scratch/lex_lts_rules.scm '(ltsregextoC "cmu" lex_lts_rules "wfst/" "c")';
75   )
76fi
77
78if [ "$1" = "lex" ]
79then
80   ( cd festival/lib/dicts/cmu;
81     # Find the words to prune from the built lexicon
82     # We will prune words that aren't homographs and the LTS gets correct
83     # Use the lts_test.log to find the failed entries and only
84     # include them in the list
85     festival -b cmulex.scm ~/projects/flite/tools/make_lex.scm '(remove_predictable_entries "cmudict-0.4.out" "pruned_lex.scm" "lex_lts_rules.scm")'
86     festival --heap 10000000 -b '(lex.compile "pruned_lex.scm" "pruned_lex.out")'
87     festival --heap 10000000 -b $FLITEDIR/tools/make_lex.scm '(lextoC "cmu" "pruned_lex.out" "c")' ;
88   )
89fi
90
91if [ "$1" = "lex2" ]
92then
93   # experiment: put Letter_Phone(s) together as things to compress
94   # does give much smaller result, though needs to be less than 256 symbols
95   # not fully implemented
96   ( cd festival/lib/dicts/cmu;
97     # Use the lts_test.log to find the failed entries and only
98     # include them in the list
99     # this needs festival-1.96 or later to get the pos from the lts_test.log
100     grep "^failed " lts_test.log |
101     sed 's/(/( /g;s/)/ )/g' |
102     awk '{printf("( \"");
103           for (i=3; $i != ")"; i++)
104               printf("%s",$i);
105           i++;
106           printf("\" %s ( ",$i);
107           i++; i++
108           for ( ; $i != ")"; i++)
109              if ($i != "#")
110                 printf("%s ",$i);
111           printf("))\n");
112         }' |
113     tr -d '()"' |
114     awk '{if ($2 == "nil")
115              printf("0_start ");
116           else
117              printf("%s_start ",$2);
118           for (i=1; i<=length($1); i++)
119              printf("%s_%s ",substr($1,i,1),$(i+2));
120           printf("\n");}' >pruned_lex2.data
121#     festival --heap 10000000 -b $FLITEDIR/tools/make_lex.scm '(lextoC "cmu" "pruned_lex.out" "c")' ;
122   )
123fi
124
125if [ "$1" = "compresslex" ]
126then
127   # Compress the entries and phone strings by finding best ngrams
128   ( cd festival/lib/dicts/cmu/c;
129     $FLITEDIR/tools/huff_table phones cmu_lex_data cmu_lex_phones_huff_table.c
130     $FLITEDIR/tools/huff_table entries cmu_lex_data cmu_lex_entries_huff_table.c
131     paste huff.entries.compressed huff.phones.compressed huff.tmp.corpus |
132     tr -d " " |
133     awk 'BEGIN {pcount = 1;
134                 printf("/* index to compressed data */\n");
135                }
136          function unenoctal(x)
137          {
138             y = ((substr(x,1,1)+0)*64) + ((substr(x,2,1)+0)*8) + (substr(x,3,1)+0);
139             return y;
140          }
141          {printf("   ");
142           for (i=length($2)-3; i>0; i-=4)
143           {
144              printf("%d,",unenoctal(substr($2,i+1,3)));
145              pcount++;
146           }
147           pcount++;
148           printf(" 255, /* %d %s */ ",pcount,$3);
149           for (i=1; i<length($1); i+=4)
150           {
151              printf("%d,",unenoctal(substr($1,i+1,3)));
152              pcount++;
153           }
154           printf("0,\n");
155           pcount++;
156          }
157        END { printf("/* num_bytes = %d */\n",pcount);}' >cmu_lex_data_compressed.c
158     grep "num_bytes = " cmu_lex_data_compressed.c |
159     awk '{print $4}' >cmu_lex_num_bytes_compressed.c
160   )
161fi
162
163if [ "$1" = "install" ]
164then
165   cp -p festival/lib/dicts/cmu/c/cmu_lex_data.c .
166   cp -p festival/lib/dicts/cmu/c/cmu_lex_data_compressed.c cmu_lex_data_raw.c
167   cp -p festival/lib/dicts/cmu/c/cmu_lex_phones_huff_table.c .
168   cp -p festival/lib/dicts/cmu/c/cmu_lex_entries_huff_table.c .
169   cp -p festival/lib/dicts/cmu/c/cmu_lex_entries.c .
170   cp -p festival/lib/dicts/cmu/c/cmu_lex_num_bytes_compressed.c cmu_lex_num_bytes.c
171
172   cp -p festival/lib/dicts/cmu/c/cmu_lts_model.c .
173   cp -p festival/lib/dicts/cmu/c/cmu_lts_model.h .
174   cp -p festival/lib/dicts/cmu/c/cmu_lts_rules.c .
175
176fi
177
178
179