1#!/usr/bin/perl -w
2
3open(RFC, "rfc1345.txt") || die;
4
5my %mnemonics;
6my %fullname;
7my %aliases;
8
9my $charset;
10my @aliases;
11my $code;
12my @map;
13
14unless (-d "maps") {
15    print STDERR "Making directory 'maps'\n";
16    mkdir('maps', 0777) || die "Can't create directory: $!";
17}
18
19if (open(ALIASES, "maps/aliases")) {
20    while (<ALIASES>) {
21	next if /^\s*\#/;
22	next if /^\s*$/;
23	chomp;
24	my($charset, @aliases) = split(' ', $_);
25	$aliases{$charset} = { map {$_ => 1} @aliases };
26    }
27}
28
29while (<RFC>) {
30    if (/^3\.\s/ .. /^4\.\s/) {  # only want chapter 3
31	if (/^ (\S+)\s+([0-9a-f]{4})\s+(.+)/) {
32	    $mnemonics{$1} = $2;
33	    $fullname{$3}  = $2;
34	}
35    }
36    if (/^5\.\s/ .. /^ACKNOWLEDGEMENTS/) {
37	if (/^  &charset\s+(\S+)/) {
38	    #print "$1\n";
39	    map_out();
40	    $charset = $1;
41	    @aliases = ();
42	    undef($code);
43	    @map = ();
44	} elsif (/^  &alias\s+(\S+)/) {
45	    #print " $1\n";
46	    push(@aliases, $1);
47	} elsif (/^  &bits\s+(\d+)/) {
48	    #print " BITS = $1\n";
49	    if ($1 ne '8') {
50		undef($charset);  # don't care about this one
51	    }
52	} elsif (/^  &code\s+(\d+)/) {
53	    #print " CODE=$1\n";
54	    $code = $1;
55	} elsif (/^  &duplicate\s+(\d+)\s+(\S+)/) {
56	    #print "DUP $1 $2\n";
57	    push(@map, [$1, $2]);
58	} elsif (/^  &([a-z][a-z0-9]+)/) {
59	    #print "$1\n";
60	} elsif (/^  (\S+ +.*)/ && $charset && defined($code)) {
61	    my $mne;
62	    for $mne (split(' ', $1)) {
63		if ($mne eq "??") {
64		    $code++;
65		    next;
66		} else {
67		    if ($code > 255) {
68			print STDERR "$charset: bad code $code\n";
69			undef($charset);  # ignore it
70			last;
71		    }
72		    push(@map, [$code++, $mne]);
73		}
74	    }
75	}
76    }
77}
78map_out();
79
80open(ALIASES, ">maps/aliases") || die "Can't write aliases: $!";
81for (sort keys %aliases) {
82    delete $aliases{$_}{$_};  # if we managed to get an alias to ourself
83    print ALIASES "$_ ", join(" ", sort keys %{$aliases{$_}}), "\n";
84}
85close(ALIASES);
86
87
88
89sub map_out
90{
91    return unless $charset;
92
93    while ($charset =~ /[^\w\-\.]/) {
94	my $orig = $charset;
95	if ($charset =~ s/:\d+$// ||
96	    $charset =~ s/_\(\d+\)$//)
97	{
98	    push(@aliases, $orig);
99	} else {
100            die "Can't wash $charset\n";
101        }
102    }
103    if ($charset =~ /^ISO_8859-(\d+)$/) {
104	push(@aliases, "iso8859-$1", "8859-$1");
105	# Fix the MACRON vs. OVERLINE bug in these encodings
106	if ($1 == 1 || $1 == 4 || $1 == 9) {
107	    splice(@map, 0xAF, 0, [0xAF, "'m"]);
108	}
109    }
110
111    print STDERR "$charset @aliases\n";
112    for (@aliases) {
113	$aliases{$charset}{$_}++;
114    }
115
116    open(BINMAP, "| perl ./map8_txt2bin >maps/$charset.bin") or die;
117    binmode BINMAP;
118    for (@map) {
119	my($code, $mne) = @$_;
120	my $x4 = $mnemonics{$mne};
121	unless ($x4) {
122	    print STDERR "$charset: no code for $mne\n";
123	    next;
124	}
125	if ($code < 0 || $code > 255) {
126	    print STDERR "$charset: bad code $code\n";
127	    next;
128	}
129	printf BINMAP "0x%02x 0x%s\n", $code, $x4;
130    }
131    close(BINMAP);
132
133}
134