1#!/usr/bin/perl -w 2 3open(RFC, "rfc1345.txt") || die; 4 5my %mnemonics; 6my %fullname; 7my %aliases; 8 9my $charset; 10my @aliases; 11my $code; 12my @map; 13 14unless (-d "maps") { 15 print STDERR "Making directory 'maps'\n"; 16 mkdir('maps', 0777) || die "Can't create directory: $!"; 17} 18 19if (open(ALIASES, "maps/aliases")) { 20 while (<ALIASES>) { 21 next if /^\s*\#/; 22 next if /^\s*$/; 23 chomp; 24 my($charset, @aliases) = split(' ', $_); 25 $aliases{$charset} = { map {$_ => 1} @aliases }; 26 } 27} 28 29while (<RFC>) { 30 if (/^3\.\s/ .. /^4\.\s/) { # only want chapter 3 31 if (/^ (\S+)\s+([0-9a-f]{4})\s+(.+)/) { 32 $mnemonics{$1} = $2; 33 $fullname{$3} = $2; 34 } 35 } 36 if (/^5\.\s/ .. /^ACKNOWLEDGEMENTS/) { 37 if (/^ &charset\s+(\S+)/) { 38 #print "$1\n"; 39 map_out(); 40 $charset = $1; 41 @aliases = (); 42 undef($code); 43 @map = (); 44 } elsif (/^ &alias\s+(\S+)/) { 45 #print " $1\n"; 46 push(@aliases, $1); 47 } elsif (/^ &bits\s+(\d+)/) { 48 #print " BITS = $1\n"; 49 if ($1 ne '8') { 50 undef($charset); # don't care about this one 51 } 52 } elsif (/^ &code\s+(\d+)/) { 53 #print " CODE=$1\n"; 54 $code = $1; 55 } elsif (/^ &duplicate\s+(\d+)\s+(\S+)/) { 56 #print "DUP $1 $2\n"; 57 push(@map, [$1, $2]); 58 } elsif (/^ &([a-z][a-z0-9]+)/) { 59 #print "$1\n"; 60 } elsif (/^ (\S+ +.*)/ && $charset && defined($code)) { 61 my $mne; 62 for $mne (split(' ', $1)) { 63 if ($mne eq "??") { 64 $code++; 65 next; 66 } else { 67 if ($code > 255) { 68 print STDERR "$charset: bad code $code\n"; 69 undef($charset); # ignore it 70 last; 71 } 72 push(@map, [$code++, $mne]); 73 } 74 } 75 } 76 } 77} 78map_out(); 79 80open(ALIASES, ">maps/aliases") || die "Can't write aliases: $!"; 81for (sort keys %aliases) { 82 delete $aliases{$_}{$_}; # if we managed to get an alias to ourself 83 print ALIASES "$_ ", join(" ", sort keys %{$aliases{$_}}), "\n"; 84} 85close(ALIASES); 86 87 88 89sub map_out 90{ 91 return unless $charset; 92 93 while ($charset =~ /[^\w\-\.]/) { 94 my $orig = $charset; 95 if ($charset =~ s/:\d+$// || 96 $charset =~ s/_\(\d+\)$//) 97 { 98 push(@aliases, $orig); 99 } else { 100 die "Can't wash $charset\n"; 101 } 102 } 103 if ($charset =~ /^ISO_8859-(\d+)$/) { 104 push(@aliases, "iso8859-$1", "8859-$1"); 105 # Fix the MACRON vs. OVERLINE bug in these encodings 106 if ($1 == 1 || $1 == 4 || $1 == 9) { 107 splice(@map, 0xAF, 0, [0xAF, "'m"]); 108 } 109 } 110 111 print STDERR "$charset @aliases\n"; 112 for (@aliases) { 113 $aliases{$charset}{$_}++; 114 } 115 116 open(BINMAP, "| perl ./map8_txt2bin >maps/$charset.bin") or die; 117 binmode BINMAP; 118 for (@map) { 119 my($code, $mne) = @$_; 120 my $x4 = $mnemonics{$mne}; 121 unless ($x4) { 122 print STDERR "$charset: no code for $mne\n"; 123 next; 124 } 125 if ($code < 0 || $code > 255) { 126 print STDERR "$charset: bad code $code\n"; 127 next; 128 } 129 printf BINMAP "0x%02x 0x%s\n", $code, $x4; 130 } 131 close(BINMAP); 132 133} 134