1#!/usr/bin/perl -w 2# $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $ 3# 4# Translate one or more ".tbl" files into ".html" files which can be used to 5# test the charset support in lynx. Each of the ".html" files will use the 6# charset that corresponds to the input ".tbl" file. 7 8use strict; 9 10use Getopt::Std; 11use File::Basename; 12use POSIX qw(strtod); 13 14sub field($$) { 15 my $value = $_[0]; 16 my $count = $_[1]; 17 18 while ( $count > 0 ) { 19 $count -= 1; 20 $value =~ s/^\S*\s*//; 21 } 22 $value =~ s/\s.*//; 23 return $value; 24} 25 26sub notes($) { 27 my $value = $_[0]; 28 29 $value =~ s/^[^#]*//; 30 $value =~ s/^#//; 31 $value =~ s/^\s+//; 32 33 return $value; 34} 35 36sub make_header($$$) { 37 my $source = $_[0]; 38 my $charset = $_[1]; 39 my $official = $_[2]; 40 41 printf FP "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"; 42 printf FP "<HTML>\n"; 43 printf FP "<HEAD>\n"; 44 printf FP "<!-- $source -->\n"; 45 printf FP "<TITLE>%s table</TITLE>\n", &escaped($official); 46 printf FP "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=%s\">\n", &escaped($charset); 47 printf FP "</HEAD>\n"; 48 printf FP "\n"; 49 printf FP "<BODY> \n"; 50 printf FP "\n"; 51 printf FP "<H1 ALIGN=center>%s table</H1> \n", &escaped($charset); 52 printf FP "\n"; 53 printf FP "<PRE>\n"; 54 printf FP "Code Char Entity Render Description\n"; 55} 56 57sub make_mark() { 58 printf FP "---- ---- ------ ------ -----------------------------------\n"; 59} 60 61sub escaped($) { 62 my $result = $_[0]; 63 $result =~ s/&/&/g; 64 $result =~ s/</</g; 65 $result =~ s/>/>/g; 66 return $result; 67} 68 69sub make_row($$$) { 70 my $old_code = $_[0]; 71 my $new_code = $_[1]; 72 my $comments = $_[2]; 73 74 # printf "# make_row %d %d %s\n", $old_code, $new_code, $comments; 75 my $visible = sprintf("&#%d; ", $new_code); 76 if ($old_code < 256) { 77 printf FP "%4x %c %.13s &#%d; %s\n", 78 $old_code, $old_code, 79 $visible, $new_code, 80 &escaped($comments); 81 } else { 82 printf FP "%4x . %.13s &#%d; %s\n", 83 $old_code, 84 $visible, $new_code, 85 &escaped($comments); 86 } 87} 88 89sub null_row($$) { 90 my $old_code = $_[0]; 91 my $comments = $_[1]; 92 93 if ($old_code < 256) { 94 printf FP "%4x %c %s\n", 95 $old_code, $old_code, 96 &escaped($comments); 97 } else { 98 printf FP "%4x . %s\n", 99 $old_code, 100 &escaped($comments); 101 } 102} 103 104sub make_footer() { 105 printf FP "</PRE>\n"; 106 printf FP "</BODY>\n"; 107 printf FP "</HTML>\n"; 108} 109 110# return true if the string describes a range 111sub is_range($) { 112 return ($_[0] =~ /.*-.*/); 113} 114 115# convert the U+'s to 0x's so strtod() can convert them. 116sub zeroxes($) { 117 my $result = $_[0]; 118 $result =~ s/^U\+/0x/; 119 $result =~ s/-U\+/-0x/; 120 return $result; 121} 122 123# convert a string to a number (-1's are outside the range of Unicode). 124sub value_of($) { 125 my ($result, $oops) = strtod($_[0]); 126 $result = -1 if ($oops ne 0); 127 return $result; 128} 129 130# return the first number in a range 131sub first_of($) { 132 my $range = &zeroxes($_[0]); 133 $range =~ s/-.*//; 134 return &value_of($range); 135} 136 137# return the last number in a range 138sub last_of($) { 139 my $range = &zeroxes($_[0]); 140 $range =~ s/^.*-//; 141 return &value_of($range); 142} 143 144sub one_many($$$) { 145 my $oldcode = $_[0]; 146 my $newcode = &zeroxes($_[1]); 147 my $comment = $_[2]; 148 149 my $old_code = &value_of($oldcode); 150 if ( $old_code lt 0 ) { 151 printf "? Problem with number \"%s\"\n", $oldcode; 152 } else { 153 &make_mark if (( $old_code % 8 ) == 0 ); 154 155 if ( $newcode =~ /^#.*/ ) { 156 &null_row($old_code, $comment); 157 } elsif ( &is_range($newcode) ) { 158 my $first_item = &first_of($newcode); 159 my $last_item = &last_of($newcode); 160 my $item; 161 162 if ( $first_item lt 0 or $last_item lt 0 ) { 163 printf "? Problem with one:many numbers \"%s\"\n", $newcode; 164 } else { 165 if ( $comment =~ /^$/ ) { 166 $comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item); 167 } else { 168 $comment = $comment . " (range)"; 169 } 170 for $item ( $first_item..$last_item) { 171 &make_row($old_code, $item, $comment); 172 } 173 } 174 } else { 175 my $new_code = &value_of($newcode); 176 if ( $new_code lt 0 ) { 177 printf "? Problem with number \"%s\"\n", $newcode; 178 } else { 179 if ( $comment =~ /^$/ ) { 180 $comment = sprintf("mapped: %#x to %#x", $old_code, $new_code); 181 } 182 &make_row($old_code, $new_code, $comment); 183 } 184 } 185 } 186} 187 188sub many_many($$$) { 189 my $oldcode = $_[0]; 190 my $newcode = $_[1]; 191 my $comment = $_[2]; 192 193 my $first_old = &first_of($oldcode); 194 my $last_old = &last_of($oldcode); 195 my $item; 196 197 if (&is_range($newcode)) { 198 my $first_new = &first_of($newcode); 199 my $last_new = &last_of($newcode); 200 for $item ( $first_old..$last_old) { 201 &one_many($item, $first_new, $comment); 202 $first_new += 1; 203 } 204 } else { 205 for $item ( $first_old..$last_old) { 206 &one_many($item, $newcode, $comment); 207 } 208 } 209} 210 211sub approximate($$$) { 212 my $values = $_[0]; 213 my $expect = sprintf("%-8s", $_[1]); 214 my $comment = $_[2]; 215 my $escaped = &escaped($expect); 216 my $left; 217 my $this; 218 my $next; 219 220 $escaped =~ s/\\134/\\/g; 221 $escaped =~ s/\\015/\
\;/g; 222 $escaped =~ s/\\012/\
\;/g; 223 224 while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) { 225 $left = $escaped; 226 $left =~ s/\\[0-7]{3}.*//; 227 $this = substr $escaped,length($left)+1,3; 228 $next = substr $escaped,length($left)+4; 229 $escaped = sprintf("%s&#%d;%s", $left, oct $this, $next); 230 } 231 232 my $visible = sprintf("&#%d; ", $values); 233 if ($values < 256) { 234 printf FP "%4x %c %.13s &#%d; approx: %s\n", 235 $values, $values, 236 $visible, 237 $values, 238 $escaped; 239 } else { 240 printf FP "%4x . %.13s &#%d; approx: %s\n", 241 $values, 242 $visible, 243 $values, 244 $escaped; 245 } 246} 247 248sub doit($) { 249 my $source = $_[0]; 250 251 printf "** %s\n", $source; 252 253 my $target = basename($source, ".tbl"); 254 255 # Read the file into an array in memory. 256 open(FP,$source) || do { 257 print STDERR "Can't open input $source: $!\n"; 258 return; 259 }; 260 my (@input) = <FP>; 261 chomp @input; 262 close(FP); 263 264 my $n; 265 my $charset = ""; 266 my $official = ""; 267 my $empty = 1; 268 269 for $n (0..$#input) { 270 $input[$n] =~ s/\s*$//; # trim trailing blanks 271 $input[$n] =~ s/^\s*//; # trim leading blanks 272 $input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff 273 274 next if $input[$n] =~ /^$/; 275 next if $input[$n] =~ /^#.*$/; 276 277 if ( $empty 278 and ( $input[$n] =~ /^\d/ 279 or $input[$n] =~ /^U\+/ ) ) { 280 $target = $charset . ".html"; 281 printf "=> %s\n", $target; 282 open(FP,">$target") || do { 283 print STDERR "Can't open output $target: $!\n"; 284 return; 285 }; 286 &make_header($source, $charset, $official); 287 $empty = 0; 288 } 289 290 if ( $input[$n] =~ /^M.*/ ) { 291 $charset = $input[$n]; 292 $charset =~ s/^.//; 293 } elsif ( $input[$n] =~ /^O.*/ ) { 294 $official = $input[$n]; 295 $official =~ s/^.//; 296 } elsif ( $input[$n] =~ /^\d/ ) { 297 298 my $newcode = &field($input[$n], 1); 299 300 next if ( $newcode eq "idem" ); 301 next if ( $newcode eq "" ); 302 303 my $oldcode = &field($input[$n], 0); 304 if ( &is_range($oldcode) ) { 305 &many_many($oldcode, $newcode, ¬es($input[$n])); 306 } else { 307 &one_many($oldcode, $newcode, ¬es($input[$n])); 308 } 309 } elsif ( $input[$n] =~ /^U\+/ ) { 310 if ( $input[$n] =~ /^U\+\w+:/ ) { 311 my $values = $input[$n]; 312 my $expect = $input[$n]; 313 314 $values =~ s/:.*//; 315 $values = &zeroxes($values); 316 $expect =~ s/^[^:]+://; 317 318 if ( &is_range($values) ) { 319 printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect; 320 } else { 321 &approximate(&value_of($values), $expect, ¬es($input[$n])); 322 } 323 } else { 324 my $value = $input[$n]; 325 $value =~ s/\s*".*//; 326 $value = &value_of(&zeroxes($value)); 327 if ($value gt 0) { 328 my $quote = $input[$n]; 329 my $comment = ¬es($input[$n]); 330 $quote =~ s/^[^"]*"//; 331 $quote =~ s/".*//; 332 &approximate($value, $quote, $comment); 333 } else { 334 printf "fixme:%d(%s)\n", $n, $input[$n]; 335 } 336 } 337 } else { 338 # printf "skipping line %d:%s\n", $n + 1, $input[$n]; 339 } 340 } 341 if ( ! $empty ) { 342 &make_footer(); 343 } 344 close FP; 345} 346 347sub usage() { 348 print <<USAGE; 349Usage: $0 [tbl-files] 350 351The script writes a new ".html" file for each input, using 352the same name as the input, stripping the ".tbl" suffix. 353USAGE 354 exit(1); 355} 356 357if ( $#ARGV < 0 ) { 358 usage(); 359} else { 360 while ( $#ARGV >= 0 ) { 361 &doit ( shift @ARGV ); 362 } 363} 364exit (0); 365