1#!@PERL@ 2# Copyright (c) 1991-2007 Kawahara Lab., Kyoto University 3# Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 4# Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology 5# 6# @configure_input@ 7# 8 9## setup 10# tmpdir 11$usrtmpdir = ""; # specify if any 12 13# mkfa executable location 14($thisdir) = ($0 =~ /(.*(\/|\\))[^\/\\]*$/o); 15$mkfabin = "${thisdir}mkfa"; 16 17# dfa_minimize executable location 18$minimizebin = "${thisdir}dfa_minimize"; 19# find tmpdir 20@tmpdirs = ($usrtmpdir, $ENV{"TMP"}, $ENV{"TEMP"}, "/tmp", "/var/tmp", "/WINDOWS/Temp", "/WINNT/Temp"); 21 22$tmpdir=""; 23while (@tmpdirs) { 24 $t = shift(@tmpdirs); 25 next if ($t eq ""); 26 if (-d "$t" && -w "$t") { 27 $tmpdir = $t; 28 last; 29 } 30} 31if ($tmpdir eq "") { 32 die "Please set working directory in \$usrtmpdir at $0\n"; 33} 34 35############################################################# 36 37if ($#ARGV < 0 || $ARGV[0] eq "-h") { 38 usage(); 39} 40 41$make_dict = 1; 42$make_term = 1; 43 44$CRLF = 0; 45 46$gramprefix = ""; 47foreach $arg (@ARGV) { 48 if ($arg eq "-t") { 49 $make_term = 1; 50 } elsif ($arg eq "-n") { 51 $make_dict = 0; 52 } else { 53 $gramprefix = $arg; 54 } 55} 56if ($gramprefix eq "") { 57 usage(); 58} 59$gramfile = "$ARGV[$#ARGV].grammar"; 60$vocafile = "$ARGV[$#ARGV].voca"; 61$dfafile = "$ARGV[$#ARGV].dfa"; 62$dictfile = "$ARGV[$#ARGV].dict"; 63$termfile = "$ARGV[$#ARGV].term"; 64$tmpprefix = "$tmpdir/g$$"; 65$tmpvocafile = "${tmpprefix}.voca"; 66$rgramfile = "${tmpprefix}.grammar"; 67 68# generate reverse grammar file 69open(GRAM,"< $gramfile") || die "cannot open \"$gramfile\""; 70open(RGRAM,"> $rgramfile") || die "cannot open \"$rgramfile\""; 71$n = 0; 72while (<GRAM>) { 73 chomp; 74 $CRLF = 1 if /\r$/; 75 s/\r+$//g; 76 s/#.*//g; 77 if (/^[ \t]*$/) {next;} 78 ($left, $right) = split(/\:/); 79 if ($CRLF == 1) { 80 print RGRAM $left, ': ', join(' ', reverse(split(/ /,$right))), "\r\n"; 81 } else { 82 print RGRAM $left, ': ', join(' ', reverse(split(/ /,$right))), "\n"; 83 } 84 $n ++; 85} 86close(GRAM); 87close(RGRAM); 88print "$gramfile has $n rules\n"; 89 90# make temporary voca for mkfa (include only category info) 91if (! -r $vocafile) { 92 die "cannot open voca file $vocafile"; 93} 94open(VOCA,"$vocafile") || die "cannot open vocabulary file"; 95open(TMPVOCA,"> $tmpvocafile") || die "cannot open temporary file $tmpvocafile"; 96if ($make_term == 1) { 97 open(GTERM, "> $termfile"); 98} 99$n1 = 0; 100$n2 = 0; 101$termid = 0; 102while (<VOCA>) { 103 chomp; 104 $CRLF = 1 if /\r$/; 105 s/\r+$//g; 106 s/#.*//g; 107 if (/^[ \t]*$/) {next;} 108 if (/^%[ \t]*([A-Za-z0-9_]*)/) { 109 if ($CRLF == 1) { 110 printf(TMPVOCA "\#%s\r\n", $1); 111 } else { 112 printf(TMPVOCA "\#%s\n", $1); 113 } 114 if ($make_term == 1) { 115 if ($CRLF == 1) { 116 printf(GTERM "%d\t%s\r\n",$termid, $1); 117 } else { 118 printf(GTERM "%d\t%s\n",$termid, $1); 119 } 120 $termid++; 121 } 122 $n1++; 123 } else { 124 $n2++; 125 } 126} 127close(VOCA); 128close(TMPVOCA); 129if ($make_term == 1) { 130 close(GTERM); 131} 132print "$vocafile has $n1 categories and $n2 words\n"; 133 134print "---\n"; 135 136# call mkfa and make .dfa 137if (! -x $minimizebin) { 138 # no minimization 139 print "Warning: dfa_minimize not found in the same place as mkdfa.pl\n"; 140 print "Warning: no minimization performed\n"; 141 if ($tmpprefix =~ /cygdrive/) { 142 $status = system("$mkfabin -e1 -fg `cygpath -w $rgramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w $dfafile` -fh `cygpath -w ${tmpprefix}.h`"); 143 } else { 144 $status = system("$mkfabin -e1 -fg $rgramfile -fv $tmpvocafile -fo $dfafile -fh ${tmpprefix}.h"); 145 } 146} else { 147 # minimize DFA after generation 148 if ($tmpprefix =~ /cygdrive/) { 149 $status = system("$mkfabin -e1 -fg `cygpath -w $rgramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w ${dfafile}.tmp` -fh `cygpath -w ${tmpprefix}.h`"); 150 system("$minimizebin `cygpath -w ${dfafile}.tmp` -o `cygpath -w $dfafile`"); 151 } else { 152 $status = system("$mkfabin -e1 -fg $rgramfile -fv $tmpvocafile -fo ${dfafile}.tmp -fh ${tmpprefix}.h"); 153 system("$minimizebin ${dfafile}.tmp -o $dfafile"); 154 } 155 unlink("${dfafile}.tmp"); 156} 157unlink("$rgramfile"); 158unlink("$tmpvocafile"); 159unlink("${tmpprefix}.h"); 160print "---\n"; 161if ($status != 0) { 162 # error 163 print "no .dfa or .dict file generated\n"; 164 exit; 165} 166 167# convert .voca -> .dict 168# terminal number should be ordered by voca at mkfa output 169if ($make_dict == 1) { 170 $nowid = -1; 171 open(VOCA, "$vocafile") || die "No vocafile \"$vocafile\" found.\n"; 172 open(DICT, "> $dictfile") || die "cannot open $dictfile for writing.\n"; 173 while (<VOCA>) { 174 chomp; 175 s/\r//g; 176 s/#.*//g; 177 if (/^[ \t]*$/) {next;} 178 if (/^%/) { 179 $nowid++; 180 next; 181 } else { 182 @a = split; 183 $name = shift(@a); 184 if ($CRLF == 1) { 185 printf(DICT "%d\t[%s]\t%s\r\n", $nowid, $name, join(' ', @a)); 186 } else { 187 printf(DICT "%d\t[%s]\t%s\n", $nowid, $name, join(' ', @a)); 188 } 189 } 190 } 191 close(VOCA); 192 close(DICT); 193} 194 195$gene = "$dfafile"; 196if ($make_term == 1) { 197 $gene .= " $termfile"; 198} 199if ($make_dict == 1) { 200 $gene .= " $dictfile"; 201} 202print "generated: $gene\n"; 203 204sub usage { 205 print "mkdfa.pl --- DFA compiler\n"; 206 print "usage: $0 [-n] prefix\n"; 207 print "\t-n ... keep current dict, not generate\n"; 208 exit; 209} 210