1#!/usr/bin/perl
2
3use Text::Iconv;
4use Encode;
5use strict;
6use utf8;
7
8# command line parsing
9die "Usage: $0 filename.kbd charset [EURO|YEN]\n"
10    unless ($ARGV[1]);
11
12my $inputfile = shift;					# first command argument
13my $converter = Text::Iconv->new(shift, "UTF-8");	# second argument
14my $use_euro;
15my $use_yen;
16my $current_char;
17my $current_scancode;
18
19while (my $arg = shift) {
20    $use_euro = 1, next
21	if $arg eq "EURO";
22    $use_yen = 1, next
23	if $arg eq "YEN";
24    die "Unknown encoding option '$arg'\n";
25}
26
27# converter functions
28sub local_to_UCS_string
29{
30    my ($string) = @_;
31
32    return $converter->convert($string);
33}
34
35sub prettyprint_token
36{
37    my ($ucs_char) = @_;
38
39    return "'" . chr($ucs_char) . "'"
40        if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
41#    return sprintf "%d", $ucs_char; # <---- temporary decimal
42    return sprintf "0x%02x", $ucs_char
43        if $ucs_char <= 255;        # print as hex number, else
44    return sprintf "0x%04x", $ucs_char;
45}
46
47sub local_to_UCS_code
48{
49    my ($char) = @_;
50
51    my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
52
53    $current_char = lc(chr($ucs_char))
54	if $current_char eq "";
55
56    $ucs_char = 0x20ac	# replace with Euro character
57	if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
58
59    $ucs_char = 0xa5	# replace with Jap. Yen character on PC kbd
60	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
61
62    return prettyprint_token($ucs_char);
63}
64
65sub malformed_to_UCS_code
66{
67    my ($char) = @_;
68
69    return prettyprint_token(ord(Encode::decode("UTF-8", $char)));
70}
71
72sub convert_token
73{
74    my ($C) = @_;
75
76    return $1
77        if $C =~ m/^([a-z][a-z0-9]*)$/;		# key token
78    return local_to_UCS_code(chr($1))
79        if $C =~ m/^(\d+)$/;			# decimal number
80    return local_to_UCS_code(chr(hex($1)))
81        if $C =~ m/^0x([0-9a-f]+)$/i;		# hex number
82    return local_to_UCS_code(chr(ord($1)))
83        if $C =~ m/^'(.)'$/;			# character
84    return malformed_to_UCS_code($1)
85        if $C =~ m/^'(.+)'$/;			# character
86    return "<?$C?>";				# uncovered case
87}
88
89sub tokenize { # split on white space and parentheses (but not within token)
90    my ($line) = @_;
91
92    $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
93    $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
94    $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
95    $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses
96    my $matches;
97    do {
98	$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
99    } while $matches;
100    $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
101    my @KEYTOKEN = split (" ", $line);
102    grep(s/_squote[LR]?_/'/g, @KEYTOKEN);
103    grep(s/_spc_/' '/, @KEYTOKEN);
104    grep(s/_lpar_/'('/, @KEYTOKEN);
105    grep(s/_rpar_/')'/, @KEYTOKEN);
106    return @KEYTOKEN;
107}
108
109# main program
110open FH, "<$inputfile";
111while (<FH>) {
112    if (m/^#/) {
113	print local_to_UCS_string($_);
114    } elsif (m/^\s*$/) {
115	print "\n";
116    } else {
117	my @KEYTOKEN = tokenize($_);
118	my $at_bol = 1;
119	my $C;
120	foreach $C (@KEYTOKEN) {
121	    if ($at_bol) {
122		$current_char = "";
123		$current_scancode = -1;
124		if ($C =~ m/^\s*\d/) { # line begins with key code number
125		    $current_scancode = $C;
126		    printf "  %03d   ", $C;
127		} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
128		    printf "  %-4s ", $C; # accent name starts accent definition
129		} elsif ($C eq "(") {
130		    printf "%17s", "( "; # paren continues accent definition
131		} else {
132		    print "Unknown input line format: $_";
133		}
134		$at_bol = 0;
135	    } else {
136		if ($C =~ m/^([BCNO])$/) {
137		    print " $1"; # special case: effect of Caps Lock/Num Lock
138		} elsif ($C eq "(") {
139		    $current_char = "";
140		    print " ( ";
141		} elsif ($C eq ")") {
142		    print " )";
143		} else {
144		    printf "%-6s ", convert_token($C);
145		}
146	    }
147	}
148	print "\n";
149    }
150}
151close FH;
152