#!/usr/bin/perl -w open(RFC, "rfc1345.txt") || die; my %mnemonics; my %fullname; my %aliases; my $charset; my @aliases; my $code; my @map; unless (-d "maps") { print STDERR "Making directory 'maps'\n"; mkdir('maps', 0777) || die "Can't create directory: $!"; } if (open(ALIASES, "maps/aliases")) { while () { next if /^\s*\#/; next if /^\s*$/; chomp; my($charset, @aliases) = split(' ', $_); $aliases{$charset} = { map {$_ => 1} @aliases }; } } while () { if (/^3\.\s/ .. /^4\.\s/) { # only want chapter 3 if (/^ (\S+)\s+([0-9a-f]{4})\s+(.+)/) { $mnemonics{$1} = $2; $fullname{$3} = $2; } } if (/^5\.\s/ .. /^ACKNOWLEDGEMENTS/) { if (/^ &charset\s+(\S+)/) { #print "$1\n"; map_out(); $charset = $1; @aliases = (); undef($code); @map = (); } elsif (/^ &alias\s+(\S+)/) { #print " $1\n"; push(@aliases, $1); } elsif (/^ &bits\s+(\d+)/) { #print " BITS = $1\n"; if ($1 ne '8') { undef($charset); # don't care about this one } } elsif (/^ &code\s+(\d+)/) { #print " CODE=$1\n"; $code = $1; } elsif (/^ &duplicate\s+(\d+)\s+(\S+)/) { #print "DUP $1 $2\n"; push(@map, [$1, $2]); } elsif (/^ &([a-z][a-z0-9]+)/) { #print "$1\n"; } elsif (/^ (\S+ +.*)/ && $charset && defined($code)) { my $mne; for $mne (split(' ', $1)) { if ($mne eq "??") { $code++; next; } else { if ($code > 255) { print STDERR "$charset: bad code $code\n"; undef($charset); # ignore it last; } push(@map, [$code++, $mne]); } } } } } map_out(); open(ALIASES, ">maps/aliases") || die "Can't write aliases: $!"; for (sort keys %aliases) { delete $aliases{$_}{$_}; # if we managed to get an alias to ourself print ALIASES "$_ ", join(" ", sort keys %{$aliases{$_}}), "\n"; } close(ALIASES); sub map_out { return unless $charset; while ($charset =~ /[^\w\-\.]/) { my $orig = $charset; if ($charset =~ s/:\d+$// || $charset =~ s/_\(\d+\)$//) { push(@aliases, $orig); } else { die "Can't wash $charset\n"; } } if ($charset =~ /^ISO_8859-(\d+)$/) { push(@aliases, "iso8859-$1", "8859-$1"); # Fix the MACRON vs. OVERLINE bug in these encodings if ($1 == 1 || $1 == 4 || $1 == 9) { splice(@map, 0xAF, 0, [0xAF, "'m"]); } } print STDERR "$charset @aliases\n"; for (@aliases) { $aliases{$charset}{$_}++; } open(BINMAP, "| perl ./map8_txt2bin >maps/$charset.bin") or die; binmode BINMAP; for (@map) { my($code, $mne) = @$_; my $x4 = $mnemonics{$mne}; unless ($x4) { print STDERR "$charset: no code for $mne\n"; next; } if ($code < 0 || $code > 255) { print STDERR "$charset: bad code $code\n"; next; } printf BINMAP "0x%02x 0x%s\n", $code, $x4; } close(BINMAP); }