1#!/usr/bin/env perl -w
2
3# This script generates sbcsdat.c (the data for all the SBCSes) from its
4# source form sbcs.dat.
5
6$infile = "sbcs.dat";
7$outfile = undef;
8$outheader = undef;
9
10my $doing_opts = 1;
11my $nargs = 0;
12while (@ARGV) {
13    if ($doing_opts && $ARGV[0] =~ m/^-/) {
14        if ($ARGV[0] =~ m/^--source=(.*)$/) {
15            $outfile = $1;
16            shift;
17        } elsif ($ARGV[0] =~ m/^--header=(.*)$/) {
18            $outheader = $1;
19            shift;
20        } elsif ($ARGV[0] eq "--") {
21            $doing_opts = 0;
22            shift;
23        } else {
24            die "unrecognised option '$ARGV[0]'\n";
25        }
26        next;
27    }
28    if ($nargs++ == 0) {
29        $infile = $ARGV[0];
30        shift;
31    } else {
32        die "spurious extra argument '$ARGV[0]'\n";
33    }
34}
35
36die "usage: sbcsgen.pl ( --source=SRCFILE | --header=HDRFILE ) [INFILE]\n"
37    unless defined $outfile or defined $outheader;
38
39open INFH, $infile;
40
41my $charsetname = undef;
42my @vals = ();
43
44my @charsetnames = ();
45my @sortpriority = ();
46
47if (defined $outfile) {
48    open SOURCEFH, ">", $outfile;
49    select SOURCEFH;
50
51    print "/*\n";
52    print " * sbcsdat.c - data definitions for single-byte character sets.\n";
53    print " *\n";
54    print " * Generated by sbcsgen.pl from sbcs.dat.\n";
55    print " * You should edit those files rather than editing this one.\n";
56    print " */\n";
57    print "\n";
58    print "#ifndef ENUM_CHARSETS\n";
59    print "\n";
60    print "#include \"charset.h\"\n";
61    print "#include \"internal.h\"\n";
62    print "\n";
63}
64
65while (<INFH>) {
66    chomp;
67    y/\r\n//; # robustness in the face of strange line endings
68    if (/^charset (.*)$/) {
69	$charsetname = $1;
70	@vals = ();
71	@sortpriority = map { 0 } 0..255;
72    } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
73	for ($i = hex $1; $i <= hex $2; $i++) {
74	    $sortpriority[$i] += $3;
75	}
76    } elsif (/^[0-9a-fA-FX]/) {
77	push @vals, map { $_ eq "XXXX" ? -1 : hex $_ } split / +/, $_;
78	if (scalar @vals > 256) {
79	    die "$infile:$.: charset $charsetname has more than 256 values\n";
80	} elsif (scalar @vals == 256) {
81	    &outcharset($charsetname, \@vals, \@sortpriority)
82                if defined $outfile;
83	    push @charsetnames, $charsetname;
84	    $charsetname = undef;
85	    @vals = ();
86	    @sortpriority = map { 0 } 0..255;
87	}
88    }
89}
90
91if (defined $outfile) {
92    print "#else /* ENUM_CHARSETS */\n";
93    print "\n";
94
95    foreach $i (@charsetnames) {
96        print "ENUM_CHARSET($i)\n";
97    }
98
99    print "\n";
100    print "#endif /* ENUM_CHARSETS */\n";
101
102    close SOURCEFH;
103}
104
105if (defined $outheader) {
106    open HEADERFH, ">", $outheader;
107    select HEADERFH;
108
109    print "/*\n";
110    print " * sbcsdat.h - header file for SBCS data structures.\n";
111    print " *\n";
112    print " * Generated by sbcsgen.pl from sbcs.dat.\n";
113    print " * You should edit those files rather than editing this one.\n";
114    print " */\n";
115    print "\n";
116    print "#ifndef charset_sbcsdat_h\n";
117    print "#define charset_sbcsdat_h\n";
118    print "\n";
119    print "#include \"charset.h\"\n";
120    print "#include \"internal.h\"\n";
121    print "\n";
122    foreach $i (@charsetnames) {
123        print "extern const sbcs_data sbcsdata_$i;\n";
124    }
125    print "\n";
126    print "#endif /* charset_sbcsdat_h */\n";
127
128    close HEADERFH;
129}
130
131sub outcharset($$$) {
132    my ($name, $vals, $sortpriority) = @_;
133    my ($prefix, $i, @sorted);
134
135    print "const sbcs_data sbcsdata_$name = {\n";
136    print "    {\n";
137    $prefix = "    ";
138    @sorted = ();
139    for ($i = 0; $i < 256; $i++) {
140	if ($vals->[$i] < 0) {
141	    printf "%sERROR ", $prefix;
142	} else {
143	    printf "%s0x%04x", $prefix, $vals->[$i];
144	    die "ooh? $i\n" unless defined $sortpriority->[$i];
145	    push @sorted, [$i, $vals->[$i], 0+$sortpriority->[$i]];
146	}
147	if ($i % 8 == 7) {
148	    $prefix = ",\n    ";
149	} else {
150	    $prefix = ", ";
151	}
152    }
153    print "\n    },\n    {\n";
154    @sorted = sort { ($a->[1] == $b->[1] ?
155	              $b->[2] <=> $a->[2] :
156	              $a->[1] <=> $b->[1]) ||
157                     $a->[0] <=> $b->[0] } @sorted;
158    $prefix = "    ";
159    $uval = -1;
160    for ($i = $j = 0; $i < scalar @sorted; $i++) {
161	next if ($uval == $sorted[$i]->[1]); # low-priority alternative
162	$uval = $sorted[$i]->[1];
163	printf "%s0x%02x", $prefix, $sorted[$i]->[0];
164	if ($j % 8 == 7) {
165	    $prefix = ",\n    ";
166	} else {
167	    $prefix = ", ";
168	}
169	$j++;
170    }
171    printf "\n    },\n    %d\n", $j;
172    print "};\n";
173    print "const charset_spec charset_$name = {\n" .
174          "    $name, read_sbcs, write_sbcs, &sbcsdata_$name\n};\n\n";
175}
176