1#!@PERL@
2# Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
3# Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
4# Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
5#
6# @configure_input@
7#
8
9## setup
10# tmpdir
11$usrtmpdir = "";		# specify if any
12
13# mkfa executable location
14($thisdir) = ($0 =~ /(.*(\/|\\))[^\/\\]*$/o);
15$mkfabin = "${thisdir}mkfa";
16
17# dfa_minimize executable location
18$minimizebin = "${thisdir}dfa_minimize";
19# find tmpdir
20@tmpdirs = ($usrtmpdir, $ENV{"TMP"}, $ENV{"TEMP"}, "/tmp", "/var/tmp", "/WINDOWS/Temp", "/WINNT/Temp");
21
22$tmpdir="";
23while (@tmpdirs) {
24    $t = shift(@tmpdirs);
25    next if ($t eq "");
26    if (-d "$t" && -w "$t") {
27	$tmpdir = $t;
28	last;
29    }
30}
31if ($tmpdir eq "") {
32    die "Please set working directory in \$usrtmpdir at $0\n";
33}
34
35#############################################################
36
37if ($#ARGV < 0 || $ARGV[0] eq "-h") {
38    usage();
39}
40
41$make_dict = 1;
42$make_term = 1;
43
44$CRLF = 0;
45
46$gramprefix = "";
47foreach $arg (@ARGV) {
48    if ($arg eq "-t") {
49	$make_term = 1;
50    } elsif ($arg eq "-n") {
51	$make_dict = 0;
52    } else {
53	$gramprefix = $arg;
54    }
55}
56if ($gramprefix eq "") {
57    usage();
58}
59$gramfile = "$ARGV[$#ARGV].grammar";
60$vocafile = "$ARGV[$#ARGV].voca";
61$dfafile  = "$ARGV[$#ARGV].dfa";
62$dictfile = "$ARGV[$#ARGV].dict";
63$termfile = "$ARGV[$#ARGV].term";
64$tmpprefix = "$tmpdir/g$$";
65$tmpvocafile = "${tmpprefix}.voca";
66$rgramfile = "${tmpprefix}.grammar";
67
68# generate reverse grammar file
69open(GRAM,"< $gramfile") || die "cannot open \"$gramfile\"";
70open(RGRAM,"> $rgramfile") || die "cannot open \"$rgramfile\"";
71$n = 0;
72while (<GRAM>) {
73    chomp;
74    $CRLF = 1 if /\r$/;
75    s/\r+$//g;
76    s/#.*//g;
77    if (/^[ \t]*$/) {next;}
78    ($left, $right) = split(/\:/);
79    if ($CRLF == 1) {
80	print RGRAM $left, ': ', join(' ', reverse(split(/ /,$right))), "\r\n";
81    } else {
82	print RGRAM $left, ': ', join(' ', reverse(split(/ /,$right))), "\n";
83    }
84    $n ++;
85}
86close(GRAM);
87close(RGRAM);
88print "$gramfile has $n rules\n";
89
90# make temporary voca for mkfa (include only category info)
91if (! -r $vocafile) {
92	die "cannot open voca file $vocafile";
93}
94open(VOCA,"$vocafile") || die "cannot open vocabulary file";
95open(TMPVOCA,"> $tmpvocafile") || die "cannot open temporary file $tmpvocafile";
96if ($make_term == 1) {
97    open(GTERM, "> $termfile");
98}
99$n1 = 0;
100$n2 = 0;
101$termid = 0;
102while (<VOCA>) {
103    chomp;
104    $CRLF = 1 if /\r$/;
105    s/\r+$//g;
106    s/#.*//g;
107    if (/^[ \t]*$/) {next;}
108    if (/^%[ \t]*([A-Za-z0-9_]*)/) {
109	if ($CRLF == 1) {
110	    printf(TMPVOCA "\#%s\r\n", $1);
111	} else {
112	    printf(TMPVOCA "\#%s\n", $1);
113	}
114	if ($make_term == 1) {
115	    if ($CRLF == 1) {
116		printf(GTERM "%d\t%s\r\n",$termid, $1);
117	    } else {
118		printf(GTERM "%d\t%s\n",$termid, $1);
119	    }
120	    $termid++;
121	}
122	$n1++;
123    } else {
124	$n2++;
125    }
126}
127close(VOCA);
128close(TMPVOCA);
129if ($make_term == 1) {
130    close(GTERM);
131}
132print "$vocafile    has $n1 categories and $n2 words\n";
133
134print "---\n";
135
136# call mkfa and make .dfa
137if (! -x $minimizebin) {
138    # no minimization
139    print "Warning: dfa_minimize not found in the same place as mkdfa.pl\n";
140    print "Warning: no minimization performed\n";
141    if ($tmpprefix =~ /cygdrive/) {
142	$status = system("$mkfabin -e1 -fg `cygpath -w $rgramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w $dfafile` -fh `cygpath -w ${tmpprefix}.h`");
143    } else {
144	$status = system("$mkfabin -e1 -fg $rgramfile -fv $tmpvocafile -fo $dfafile -fh ${tmpprefix}.h");
145    }
146} else {
147    # minimize DFA after generation
148    if ($tmpprefix =~ /cygdrive/) {
149	$status = system("$mkfabin -e1 -fg `cygpath -w $rgramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w ${dfafile}.tmp` -fh `cygpath -w ${tmpprefix}.h`");
150	system("$minimizebin `cygpath -w ${dfafile}.tmp` -o `cygpath -w $dfafile`");
151    } else {
152	$status = system("$mkfabin -e1 -fg $rgramfile -fv $tmpvocafile -fo ${dfafile}.tmp -fh ${tmpprefix}.h");
153	system("$minimizebin ${dfafile}.tmp -o $dfafile");
154    }
155    unlink("${dfafile}.tmp");
156}
157unlink("$rgramfile");
158unlink("$tmpvocafile");
159unlink("${tmpprefix}.h");
160print "---\n";
161if ($status != 0) {
162    # error
163    print "no .dfa or .dict file generated\n";
164    exit;
165}
166
167# convert .voca -> .dict
168# terminal number should be ordered by voca at mkfa output
169if ($make_dict == 1) {
170    $nowid = -1;
171    open(VOCA, "$vocafile")  || die "No vocafile \"$vocafile\" found.\n";
172    open(DICT, "> $dictfile") || die "cannot open $dictfile for writing.\n";
173    while (<VOCA>) {
174	chomp;
175	s/\r//g;
176	s/#.*//g;
177	if (/^[ \t]*$/) {next;}
178	if (/^%/) {
179	    $nowid++;
180	    next;
181	} else {
182	    @a = split;
183	    $name = shift(@a);
184	    if ($CRLF == 1) {
185		printf(DICT "%d\t[%s]\t%s\r\n", $nowid, $name, join(' ', @a));
186	    } else {
187		printf(DICT "%d\t[%s]\t%s\n", $nowid, $name, join(' ', @a));
188	    }
189	}
190    }
191    close(VOCA);
192    close(DICT);
193}
194
195$gene = "$dfafile";
196if ($make_term == 1) {
197    $gene .= " $termfile";
198}
199if ($make_dict == 1) {
200    $gene .= " $dictfile";
201}
202print "generated: $gene\n";
203
204sub usage {
205    print "mkdfa.pl --- DFA compiler\n";
206    print "usage: $0 [-n] prefix\n";
207    print "\t-n ... keep current dict, not generate\n";
208    exit;
209}
210