1#!/usr/bin/perl
2# $FreeBSD$
3
4use Text::Iconv;
5use Encode;
6use strict;
7use utf8;
8
9# command line parsing
10die "Usage: $0 filename.kbd charset [EURO|YEN]\n"
11    unless ($ARGV[1]);
12
13my $inputfile = shift;					# first command argument
14my $converter = Text::Iconv->new(shift, "UTF-8");	# second argument
15my $use_euro;
16my $use_yen;
17my $current_char;
18my $current_scancode;
19
20while (my $arg = shift) {
21    $use_euro = 1, next
22	if $arg eq "EURO";
23    $use_yen = 1, next
24	if $arg eq "YEN";
25    die "Unknown encoding option '$arg'\n";
26}
27
28# converter functions
29sub local_to_UCS_string
30{
31    my ($string) = @_;
32
33    return $converter->convert($string);
34}
35
36sub prettyprint_token
37{
38    my ($ucs_char) = @_;
39
40    return "'" . chr($ucs_char) . "'"
41        if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
42#    return sprintf "%d", $ucs_char; # <---- temporary decimal
43    return sprintf "0x%02x", $ucs_char
44        if $ucs_char <= 255;        # print as hex number, else
45    return sprintf "0x%04x", $ucs_char;
46}
47
48sub local_to_UCS_code
49{
50    my ($char) = @_;
51
52    my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
53
54    $current_char = lc(chr($ucs_char))
55	if $current_char eq "";
56
57    $ucs_char = 0x20ac	# replace with Euro character
58	if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
59
60    $ucs_char = 0xa5	# replace with Jap. Yen character on PC kbd
61	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
62
63    return prettyprint_token($ucs_char);
64}
65
66sub malformed_to_UCS_code
67{
68    my ($char) = @_;
69
70    return prettyprint_token(ord(Encode::decode("UTF-8", $char)));
71}
72
73sub convert_token
74{
75    my ($C) = @_;
76
77    return $1
78        if $C =~ m/^([a-z][a-z0-9]*)$/;		# key token
79    return local_to_UCS_code(chr($1))
80        if $C =~ m/^(\d+)$/;			# decimal number
81    return local_to_UCS_code(chr(hex($1)))
82        if $C =~ m/^0x([0-9a-f]+)$/i;		# hex number
83    return local_to_UCS_code(chr(ord($1)))
84        if $C =~ m/^'(.)'$/;			# character
85    return malformed_to_UCS_code($1)
86        if $C =~ m/^'(.+)'$/;			# character
87    return "<?$C?>";				# uncovered case
88}
89
90sub tokenize { # split on white space and parentheses (but not within token)
91    my ($line) = @_;
92
93    $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
94    $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
95    $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
96    $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses
97    my $matches;
98    do {
99	$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
100    } while $matches;
101    $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
102    my @KEYTOKEN = split (" ", $line);
103    grep(s/_squote[LR]?_/'/g, @KEYTOKEN);
104    grep(s/_spc_/' '/, @KEYTOKEN);
105    grep(s/_lpar_/'('/, @KEYTOKEN);
106    grep(s/_rpar_/')'/, @KEYTOKEN);
107    return @KEYTOKEN;
108}
109
110# main program
111open FH, "<$inputfile";
112while (<FH>) {
113    if (m/^#/) {
114	print local_to_UCS_string($_);
115    } elsif (m/^\s*$/) {
116	print "\n";
117    } else {
118	my @KEYTOKEN = tokenize($_);
119	my $at_bol = 1;
120	my $C;
121	foreach $C (@KEYTOKEN) {
122	    if ($at_bol) {
123		$current_char = "";
124		$current_scancode = -1;
125		if ($C =~ m/^\s*\d/) { # line begins with key code number
126		    $current_scancode = $C;
127		    printf "  %03d   ", $C;
128		} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
129		    printf "  %-4s ", $C; # accent name starts accent definition
130		} elsif ($C eq "(") {
131		    printf "%17s", "( "; # paren continues accent definition
132		} else {
133		    print "Unknown input line format: $_";
134		}
135		$at_bol = 0;
136	    } else {
137		if ($C =~ m/^([BCNO])$/) {
138		    print " $1"; # special case: effect of Caps Lock/Num Lock
139		} elsif ($C eq "(") {
140		    $current_char = "";
141		    print " ( ";
142		} elsif ($C eq ")") {
143		    print " )";
144		} else {
145		    printf "%-6s ", convert_token($C);
146		}
147	    }
148	}
149	print "\n";
150    }
151}
152close FH;
153