1#!/usr/bin/perl 2# $FreeBSD$ 3 4use Text::Iconv; 5use Encode; 6use strict; 7use utf8; 8 9# command line parsing 10die "Usage: $0 filename.kbd charset [EURO|YEN]\n" 11 unless ($ARGV[1]); 12 13my $inputfile = shift; # first command argument 14my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument 15my $use_euro; 16my $use_yen; 17my $current_char; 18my $current_scancode; 19 20while (my $arg = shift) { 21 $use_euro = 1, next 22 if $arg eq "EURO"; 23 $use_yen = 1, next 24 if $arg eq "YEN"; 25 die "Unknown encoding option '$arg'\n"; 26} 27 28# converter functions 29sub local_to_UCS_string 30{ 31 my ($string) = @_; 32 33 return $converter->convert($string); 34} 35 36sub prettyprint_token 37{ 38 my ($ucs_char) = @_; 39 40 return "'" . chr($ucs_char) . "'" 41 if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible 42# return sprintf "%d", $ucs_char; # <---- temporary decimal 43 return sprintf "0x%02x", $ucs_char 44 if $ucs_char <= 255; # print as hex number, else 45 return sprintf "0x%04x", $ucs_char; 46} 47 48sub local_to_UCS_code 49{ 50 my ($char) = @_; 51 52 my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char))); 53 54 $current_char = lc(chr($ucs_char)) 55 if $current_char eq ""; 56 57 $ucs_char = 0x20ac # replace with Euro character 58 if $ucs_char == 0xa4 and $use_euro and $current_char eq "e"; 59 60 $ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd 61 if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125; 62 63 return prettyprint_token($ucs_char); 64} 65 66sub malformed_to_UCS_code 67{ 68 my ($char) = @_; 69 70 return prettyprint_token(ord(Encode::decode("UTF-8", $char))); 71} 72 73sub convert_token 74{ 75 my ($C) = @_; 76 77 return $1 78 if $C =~ m/^([a-z][a-z0-9]*)$/; # key token 79 return local_to_UCS_code(chr($1)) 80 if $C =~ m/^(\d+)$/; # decimal number 81 return local_to_UCS_code(chr(hex($1))) 82 if $C =~ m/^0x([0-9a-f]+)$/i; # hex number 83 return local_to_UCS_code(chr(ord($1))) 84 if $C =~ m/^'(.)'$/; # character 85 return malformed_to_UCS_code($1) 86 if $C =~ m/^'(.+)'$/; # character 87 return "<?$C?>"; # uncovered case 88} 89 90sub tokenize { # split on white space and parentheses (but not within token) 91 my ($line) = @_; 92 93 $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '(' 94 $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')' 95 $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below 96 $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses 97 my $matches; 98 do { 99 $matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g); 100 } while $matches; 101 $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' ' 102 my @KEYTOKEN = split (" ", $line); 103 grep(s/_squote[LR]?_/'/g, @KEYTOKEN); 104 grep(s/_spc_/' '/, @KEYTOKEN); 105 grep(s/_lpar_/'('/, @KEYTOKEN); 106 grep(s/_rpar_/')'/, @KEYTOKEN); 107 return @KEYTOKEN; 108} 109 110# main program 111open FH, "<$inputfile"; 112while (<FH>) { 113 if (m/^#/) { 114 print local_to_UCS_string($_); 115 } elsif (m/^\s*$/) { 116 print "\n"; 117 } else { 118 my @KEYTOKEN = tokenize($_); 119 my $at_bol = 1; 120 my $C; 121 foreach $C (@KEYTOKEN) { 122 if ($at_bol) { 123 $current_char = ""; 124 $current_scancode = -1; 125 if ($C =~ m/^\s*\d/) { # line begins with key code number 126 $current_scancode = $C; 127 printf " %03d ", $C; 128 } elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren 129 printf " %-4s ", $C; # accent name starts accent definition 130 } elsif ($C eq "(") { 131 printf "%17s", "( "; # paren continues accent definition 132 } else { 133 print "Unknown input line format: $_"; 134 } 135 $at_bol = 0; 136 } else { 137 if ($C =~ m/^([BCNO])$/) { 138 print " $1"; # special case: effect of Caps Lock/Num Lock 139 } elsif ($C eq "(") { 140 $current_char = ""; 141 print " ( "; 142 } elsif ($C eq ")") { 143 print " )"; 144 } else { 145 printf "%-6s ", convert_token($C); 146 } 147 } 148 } 149 print "\n"; 150 } 151} 152close FH; 153