1#!/usr/bin/perl 2 3use Text::Iconv; 4use Encode; 5use strict; 6use utf8; 7 8# command line parsing 9die "Usage: $0 filename.kbd charset [EURO|YEN]\n" 10 unless ($ARGV[1]); 11 12my $inputfile = shift; # first command argument 13my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument 14my $use_euro; 15my $use_yen; 16my $current_char; 17my $current_scancode; 18 19while (my $arg = shift) { 20 $use_euro = 1, next 21 if $arg eq "EURO"; 22 $use_yen = 1, next 23 if $arg eq "YEN"; 24 die "Unknown encoding option '$arg'\n"; 25} 26 27# converter functions 28sub local_to_UCS_string 29{ 30 my ($string) = @_; 31 32 return $converter->convert($string); 33} 34 35sub prettyprint_token 36{ 37 my ($ucs_char) = @_; 38 39 return "'" . chr($ucs_char) . "'" 40 if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible 41# return sprintf "%d", $ucs_char; # <---- temporary decimal 42 return sprintf "0x%02x", $ucs_char 43 if $ucs_char <= 255; # print as hex number, else 44 return sprintf "0x%04x", $ucs_char; 45} 46 47sub local_to_UCS_code 48{ 49 my ($char) = @_; 50 51 my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char))); 52 53 $current_char = lc(chr($ucs_char)) 54 if $current_char eq ""; 55 56 $ucs_char = 0x20ac # replace with Euro character 57 if $ucs_char == 0xa4 and $use_euro and $current_char eq "e"; 58 59 $ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd 60 if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125; 61 62 return prettyprint_token($ucs_char); 63} 64 65sub malformed_to_UCS_code 66{ 67 my ($char) = @_; 68 69 return prettyprint_token(ord(Encode::decode("UTF-8", $char))); 70} 71 72sub convert_token 73{ 74 my ($C) = @_; 75 76 return $1 77 if $C =~ m/^([a-z][a-z0-9]*)$/; # key token 78 return local_to_UCS_code(chr($1)) 79 if $C =~ m/^(\d+)$/; # decimal number 80 return local_to_UCS_code(chr(hex($1))) 81 if $C =~ m/^0x([0-9a-f]+)$/i; # hex number 82 return local_to_UCS_code(chr(ord($1))) 83 if $C =~ m/^'(.)'$/; # character 84 return malformed_to_UCS_code($1) 85 if $C =~ m/^'(.+)'$/; # character 86 return "<?$C?>"; # uncovered case 87} 88 89sub tokenize { # split on white space and parentheses (but not within token) 90 my ($line) = @_; 91 92 $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '(' 93 $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')' 94 $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below 95 $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses 96 my $matches; 97 do { 98 $matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g); 99 } while $matches; 100 $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' ' 101 my @KEYTOKEN = split (" ", $line); 102 grep(s/_squote[LR]?_/'/g, @KEYTOKEN); 103 grep(s/_spc_/' '/, @KEYTOKEN); 104 grep(s/_lpar_/'('/, @KEYTOKEN); 105 grep(s/_rpar_/')'/, @KEYTOKEN); 106 return @KEYTOKEN; 107} 108 109# main program 110open FH, "<$inputfile"; 111while (<FH>) { 112 if (m/^#/) { 113 print local_to_UCS_string($_); 114 } elsif (m/^\s*$/) { 115 print "\n"; 116 } else { 117 my @KEYTOKEN = tokenize($_); 118 my $at_bol = 1; 119 my $C; 120 foreach $C (@KEYTOKEN) { 121 if ($at_bol) { 122 $current_char = ""; 123 $current_scancode = -1; 124 if ($C =~ m/^\s*\d/) { # line begins with key code number 125 $current_scancode = $C; 126 printf " %03d ", $C; 127 } elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren 128 printf " %-4s ", $C; # accent name starts accent definition 129 } elsif ($C eq "(") { 130 printf "%17s", "( "; # paren continues accent definition 131 } else { 132 print "Unknown input line format: $_"; 133 } 134 $at_bol = 0; 135 } else { 136 if ($C =~ m/^([BCNO])$/) { 137 print " $1"; # special case: effect of Caps Lock/Num Lock 138 } elsif ($C eq "(") { 139 $current_char = ""; 140 print " ( "; 141 } elsif ($C eq ")") { 142 print " )"; 143 } else { 144 printf "%-6s ", convert_token($C); 145 } 146 } 147 } 148 print "\n"; 149 } 150} 151close FH; 152