1#!/bin/sh 2########################################################################### 3## ## 4## Language Technologies Institute ## 5## Carnegie Mellon University ## 6## Copyright (c) 2004 ## 7## All Rights Reserved. ## 8## ## 9## Permission is hereby granted, free of charge, to use and distribute ## 10## this software and its documentation without restriction, including ## 11## without limitation the rights to use, copy, modify, merge, publish, ## 12## distribute, sublicense, and/or sell copies of this work, and to ## 13## permit persons to whom this work is furnished to do so, subject to ## 14## the following conditions: ## 15## 1. The code must retain the above copyright notice, this list of ## 16## conditions and the following disclaimer. ## 17## 2. Any modifications must be clearly marked as such. ## 18## 3. Original authors' names are not deleted. ## 19## 4. The authors' names are not used to endorse or promote products ## 20## derived from this software without specific prior written ## 21## permission. ## 22## ## 23## CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ## 24## DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ## 25## ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ## 26## SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ## 27## FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ## 28## WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ## 29## AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ## 30## ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ## 31## THIS SOFTWARE. ## 32## ## 33########################################################################### 34## ## 35## Make the files from the Festival Released version ## 36## ## 37########################################################################### 38 39if [ "x$FLITEDIR" = "x" ] 40then 41 FLITEDIR=`pwd`/../.. 42fi 43 44if [ $# = 0 ] 45then 46 $0 setup 47 $0 lts 48 $0 lex 49 $0 compresslex 50 $0 install 51 exit 52fi 53 54if [ "$1" = "setup" ] 55then 56 tar zxvf festlex_CMU.tar.gz 57 cp -p $FESTVOXDIR/src/lts/build_lts festival/lib/dicts/cmu 58 cp -p $FESTVOXDIR/src/lts/build_lts_rules festival/lib/dicts/cmu 59 mkdir festival/lib/dicts/cmu/c 60 mkdir festival/lib/dicts/cmu/wfst 61 mkdir festival/lib/dicts/cmu/lts_scratch 62fi 63 64if [ "$1" = "lts" ] 65then 66 (cd festival/lib/dicts/cmu; 67 festival --heap 10000000 -b allowables.scm '(dump-flat-entries-all "cmudict-0.4.out" "lts_scratch/lex_entries.out")' 68 ./build_lts cummulate 69 ./build_lts align 70 ./build_lts build 71 ./build_lts merge 72 ./build_lts test 73 festival --heap 10000000 -b $FLITEDIR/tools/make_lts_wfst.scm lts_scratch/lex_lts_rules.scm '(lts_to_rg_to_wfst lex_lts_rules "wfst/")'; 74 festival --heap 10000000 -b $FLITEDIR/tools/make_lts.scm lts_scratch/lex_lts_rules.scm '(ltsregextoC "cmu" lex_lts_rules "wfst/" "c")'; 75 ) 76fi 77 78if [ "$1" = "lex" ] 79then 80 ( cd festival/lib/dicts/cmu; 81 # Find the words to prune from the built lexicon 82 # We will prune words that aren't homographs and the LTS gets correct 83 # Use the lts_test.log to find the failed entries and only 84 # include them in the list 85 festival -b cmulex.scm ~/projects/flite/tools/make_lex.scm '(remove_predictable_entries "cmudict-0.4.out" "pruned_lex.scm" "lex_lts_rules.scm")' 86 festival --heap 10000000 -b '(lex.compile "pruned_lex.scm" "pruned_lex.out")' 87 festival --heap 10000000 -b $FLITEDIR/tools/make_lex.scm '(lextoC "cmu" "pruned_lex.out" "c")' ; 88 ) 89fi 90 91if [ "$1" = "lex2" ] 92then 93 # experiment: put Letter_Phone(s) together as things to compress 94 # does give much smaller result, though needs to be less than 256 symbols 95 # not fully implemented 96 ( cd festival/lib/dicts/cmu; 97 # Use the lts_test.log to find the failed entries and only 98 # include them in the list 99 # this needs festival-1.96 or later to get the pos from the lts_test.log 100 grep "^failed " lts_test.log | 101 sed 's/(/( /g;s/)/ )/g' | 102 awk '{printf("( \""); 103 for (i=3; $i != ")"; i++) 104 printf("%s",$i); 105 i++; 106 printf("\" %s ( ",$i); 107 i++; i++ 108 for ( ; $i != ")"; i++) 109 if ($i != "#") 110 printf("%s ",$i); 111 printf("))\n"); 112 }' | 113 tr -d '()"' | 114 awk '{if ($2 == "nil") 115 printf("0_start "); 116 else 117 printf("%s_start ",$2); 118 for (i=1; i<=length($1); i++) 119 printf("%s_%s ",substr($1,i,1),$(i+2)); 120 printf("\n");}' >pruned_lex2.data 121# festival --heap 10000000 -b $FLITEDIR/tools/make_lex.scm '(lextoC "cmu" "pruned_lex.out" "c")' ; 122 ) 123fi 124 125if [ "$1" = "compresslex" ] 126then 127 # Compress the entries and phone strings by finding best ngrams 128 ( cd festival/lib/dicts/cmu/c; 129 $FLITEDIR/tools/huff_table phones cmu_lex_data cmu_lex_phones_huff_table.c 130 $FLITEDIR/tools/huff_table entries cmu_lex_data cmu_lex_entries_huff_table.c 131 paste huff.entries.compressed huff.phones.compressed huff.tmp.corpus | 132 tr -d " " | 133 awk 'BEGIN {pcount = 1; 134 printf("/* index to compressed data */\n"); 135 } 136 function unenoctal(x) 137 { 138 y = ((substr(x,1,1)+0)*64) + ((substr(x,2,1)+0)*8) + (substr(x,3,1)+0); 139 return y; 140 } 141 {printf(" "); 142 for (i=length($2)-3; i>0; i-=4) 143 { 144 printf("%d,",unenoctal(substr($2,i+1,3))); 145 pcount++; 146 } 147 pcount++; 148 printf(" 255, /* %d %s */ ",pcount,$3); 149 for (i=1; i<length($1); i+=4) 150 { 151 printf("%d,",unenoctal(substr($1,i+1,3))); 152 pcount++; 153 } 154 printf("0,\n"); 155 pcount++; 156 } 157 END { printf("/* num_bytes = %d */\n",pcount);}' >cmu_lex_data_compressed.c 158 grep "num_bytes = " cmu_lex_data_compressed.c | 159 awk '{print $4}' >cmu_lex_num_bytes_compressed.c 160 ) 161fi 162 163if [ "$1" = "install" ] 164then 165 cp -p festival/lib/dicts/cmu/c/cmu_lex_data.c . 166 cp -p festival/lib/dicts/cmu/c/cmu_lex_data_compressed.c cmu_lex_data_raw.c 167 cp -p festival/lib/dicts/cmu/c/cmu_lex_phones_huff_table.c . 168 cp -p festival/lib/dicts/cmu/c/cmu_lex_entries_huff_table.c . 169 cp -p festival/lib/dicts/cmu/c/cmu_lex_entries.c . 170 cp -p festival/lib/dicts/cmu/c/cmu_lex_num_bytes_compressed.c cmu_lex_num_bytes.c 171 172 cp -p festival/lib/dicts/cmu/c/cmu_lts_model.c . 173 cp -p festival/lib/dicts/cmu/c/cmu_lts_model.h . 174 cp -p festival/lib/dicts/cmu/c/cmu_lts_rules.c . 175 176fi 177 178 179