1#!/usr/bin/perl -w
2# $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $
3#
4# Translate one or more ".tbl" files into ".html" files which can be used to
5# test the charset support in lynx.  Each of the ".html" files will use the
6# charset that corresponds to the input ".tbl" file.
7
8use strict;
9
10use Getopt::Std;
11use File::Basename;
12use POSIX qw(strtod);
13
14sub field($$) {
15	my $value = $_[0];
16	my $count = $_[1];
17
18	while ( $count > 0 ) {
19		$count -= 1;
20		$value =~ s/^\S*\s*//;
21	}
22	$value =~ s/\s.*//;
23	return $value;
24}
25
26sub notes($) {
27	my $value = $_[0];
28
29	$value =~ s/^[^#]*//;
30	$value =~ s/^#//;
31	$value =~ s/^\s+//;
32
33	return $value;
34}
35
36sub make_header($$$) {
37	my $source   = $_[0];
38	my $charset  = $_[1];
39	my $official = $_[2];
40
41	printf FP "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
42	printf FP "<HTML>\n";
43	printf FP "<HEAD>\n";
44	printf FP "<!-- $source -->\n";
45	printf FP "<TITLE>%s table</TITLE>\n", &escaped($official);
46	printf FP "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=%s\">\n", &escaped($charset);
47	printf FP "</HEAD>\n";
48	printf FP "\n";
49	printf FP "<BODY> \n";
50	printf FP "\n";
51	printf FP "<H1 ALIGN=center>%s table</H1> \n", &escaped($charset);
52	printf FP "\n";
53	printf FP "<PRE>\n";
54	printf FP "Code  Char  Entity   Render          Description\n";
55}
56
57sub make_mark() {
58	printf FP "----  ----  ------   ------          -----------------------------------\n";
59}
60
61sub escaped($) {
62	my $result = $_[0];
63	$result =~ s/&/&amp;/g;
64	$result =~ s/</&lt;/g;
65	$result =~ s/>/&gt;/g;
66	return $result;
67}
68
69sub make_row($$$) {
70	my $old_code = $_[0];
71	my $new_code = $_[1];
72	my $comments = $_[2];
73
74	# printf "# make_row %d %d %s\n", $old_code, $new_code, $comments;
75	my $visible = sprintf("&amp;#%d;      ", $new_code);
76	if ($old_code < 256) {
77		printf FP "%4x    %c   %.13s  &#%d;             %s\n",
78			$old_code, $old_code,
79			$visible, $new_code,
80			&escaped($comments);
81	} else {
82		printf FP "%4x    .   %.13s  &#%d;             %s\n",
83			$old_code,
84			$visible, $new_code,
85			&escaped($comments);
86	}
87}
88
89sub null_row($$) {
90	my $old_code = $_[0];
91	my $comments = $_[1];
92
93	if ($old_code < 256) {
94		printf FP "%4x    %c                     %s\n",
95			$old_code, $old_code,
96			&escaped($comments);
97	} else {
98		printf FP "%4x    .                     %s\n",
99			$old_code,
100			&escaped($comments);
101	}
102}
103
104sub make_footer() {
105	printf FP "</PRE>\n";
106	printf FP "</BODY>\n";
107	printf FP "</HTML>\n";
108}
109
110# return true if the string describes a range
111sub is_range($) {
112	return ($_[0] =~ /.*-.*/);
113}
114
115# convert the U+'s to 0x's so strtod() can convert them.
116sub zeroxes($) {
117	my $result = $_[0];
118	$result =~ s/^U\+/0x/;
119	$result =~ s/-U\+/-0x/;
120	return $result;
121}
122
123# convert a string to a number (-1's are outside the range of Unicode).
124sub value_of($) {
125	my ($result, $oops) = strtod($_[0]);
126	$result = -1 if ($oops ne 0);
127	return $result;
128}
129
130# return the first number in a range
131sub first_of($) {
132	my $range = &zeroxes($_[0]);
133	$range =~ s/-.*//;
134	return &value_of($range);
135}
136
137# return the last number in a range
138sub last_of($) {
139	my $range = &zeroxes($_[0]);
140	$range =~ s/^.*-//;
141	return &value_of($range);
142}
143
144sub one_many($$$) {
145	my $oldcode = $_[0];
146	my $newcode = &zeroxes($_[1]);
147	my $comment = $_[2];
148
149	my $old_code = &value_of($oldcode);
150	if ( $old_code lt 0 ) {
151		printf "? Problem with number \"%s\"\n", $oldcode;
152	} else {
153		&make_mark if (( $old_code % 8 ) == 0 );
154
155		if ( $newcode =~ /^#.*/ ) {
156			&null_row($old_code, $comment);
157		} elsif ( &is_range($newcode) ) {
158			my $first_item = &first_of($newcode);
159			my $last_item  = &last_of($newcode);
160			my $item;
161
162			if ( $first_item lt 0 or $last_item lt 0 ) {
163				printf "? Problem with one:many numbers \"%s\"\n", $newcode;
164			} else {
165				if ( $comment =~ /^$/ ) {
166					$comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item);
167				} else {
168					$comment = $comment . " (range)";
169				}
170				for $item ( $first_item..$last_item) {
171					&make_row($old_code, $item, $comment);
172				}
173			}
174		} else {
175			my $new_code = &value_of($newcode);
176			if ( $new_code lt 0 ) {
177				printf "? Problem with number \"%s\"\n", $newcode;
178			} else {
179				if ( $comment =~ /^$/ ) {
180					$comment = sprintf("mapped: %#x to %#x", $old_code, $new_code);
181				}
182				&make_row($old_code, $new_code, $comment);
183			}
184		}
185	}
186}
187
188sub many_many($$$) {
189	my $oldcode = $_[0];
190	my $newcode = $_[1];
191	my $comment = $_[2];
192
193	my $first_old = &first_of($oldcode);
194	my $last_old  = &last_of($oldcode);
195	my $item;
196
197	if (&is_range($newcode)) {
198		my $first_new = &first_of($newcode);
199		my $last_new  = &last_of($newcode);
200		for $item ( $first_old..$last_old) {
201			&one_many($item, $first_new, $comment);
202			$first_new += 1;
203		}
204	} else {
205		for $item ( $first_old..$last_old) {
206			&one_many($item, $newcode, $comment);
207		}
208	}
209}
210
211sub approximate($$$) {
212	my $values = $_[0];
213	my $expect = sprintf("%-8s", $_[1]);
214	my $comment = $_[2];
215	my $escaped = &escaped($expect);
216	my $left;
217	my $this;
218	my $next;
219
220	$escaped =~ s/\\134/\\/g;
221	$escaped =~ s/\\015/\&#13\;/g;
222	$escaped =~ s/\\012/\&#10\;/g;
223
224	while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) {
225		$left = $escaped;
226		$left =~ s/\\[0-7]{3}.*//;
227		$this = substr $escaped,length($left)+1,3;
228		$next = substr $escaped,length($left)+4;
229		$escaped = sprintf("%s&#%d;%s", $left, oct $this, $next);
230	}
231
232	my $visible = sprintf("&amp;#%d;      ", $values);
233	if ($values < 256) {
234		printf FP "%4x    %c   %.13s  &#%d;             approx: %s\n",
235			$values, $values,
236			$visible,
237			$values,
238			$escaped;
239	} else {
240		printf FP "%4x    .   %.13s  &#%d;             approx: %s\n",
241			$values,
242			$visible,
243			$values,
244			$escaped;
245	}
246}
247
248sub doit($) {
249	my $source = $_[0];
250
251	printf "** %s\n", $source;
252
253	my $target = basename($source, ".tbl");
254
255	# Read the file into an array in memory.
256	open(FP,$source) || do {
257		print STDERR "Can't open input $source: $!\n";
258		return;
259	};
260	my (@input) = <FP>;
261	chomp @input;
262	close(FP);
263
264	my $n;
265	my $charset = "";
266	my $official = "";
267	my $empty = 1;
268
269	for $n (0..$#input) {
270		$input[$n] =~ s/\s*$//; # trim trailing blanks
271		$input[$n] =~ s/^\s*//; # trim leading blanks
272		$input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff
273
274		next if $input[$n] =~ /^$/;
275		next if $input[$n] =~ /^#.*$/;
276
277		if ( $empty
278		  and ( $input[$n] =~ /^\d/
279		     or $input[$n] =~ /^U\+/ ) ) {
280			$target = $charset . ".html";
281			printf "=> %s\n", $target;
282			open(FP,">$target") || do {
283				print STDERR "Can't open output $target: $!\n";
284				return;
285			};
286			&make_header($source, $charset, $official);
287			$empty = 0;
288		}
289
290		if ( $input[$n] =~ /^M.*/ ) {
291			$charset = $input[$n];
292			$charset =~ s/^.//;
293		} elsif ( $input[$n] =~ /^O.*/ ) {
294			$official = $input[$n];
295			$official =~ s/^.//;
296		} elsif ( $input[$n] =~ /^\d/ ) {
297
298			my $newcode = &field($input[$n], 1);
299
300			next if ( $newcode eq "idem" );
301			next if ( $newcode eq "" );
302
303			my $oldcode = &field($input[$n], 0);
304			if ( &is_range($oldcode) ) {
305				&many_many($oldcode, $newcode, &notes($input[$n]));
306			} else {
307				&one_many($oldcode, $newcode, &notes($input[$n]));
308			}
309		} elsif ( $input[$n] =~ /^U\+/ ) {
310			if ( $input[$n] =~ /^U\+\w+:/ ) {
311				my $values = $input[$n];
312				my $expect = $input[$n];
313
314				$values =~ s/:.*//;
315				$values = &zeroxes($values);
316				$expect =~ s/^[^:]+://;
317
318				if ( &is_range($values) ) {
319					printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect;
320				} else {
321					&approximate(&value_of($values), $expect, &notes($input[$n]));
322				}
323			} else {
324				my $value = $input[$n];
325				$value =~ s/\s*".*//;
326				$value = &value_of(&zeroxes($value));
327				if ($value gt 0) {
328					my $quote = $input[$n];
329					my $comment = &notes($input[$n]);
330					$quote =~ s/^[^"]*"//;
331					$quote =~ s/".*//;
332					&approximate($value, $quote, $comment);
333				} else {
334					printf "fixme:%d(%s)\n", $n, $input[$n];
335				}
336			}
337		} else {
338			# printf "skipping line %d:%s\n", $n + 1, $input[$n];
339		}
340	}
341	if ( ! $empty ) {
342		&make_footer();
343	}
344	close FP;
345}
346
347sub usage() {
348	print <<USAGE;
349Usage: $0 [tbl-files]
350
351The script writes a new ".html" file for each input, using
352the same name as the input, stripping the ".tbl" suffix.
353USAGE
354	exit(1);
355}
356
357if ( $#ARGV < 0 ) {
358	usage();
359} else {
360	while ( $#ARGV >= 0 ) {
361		&doit ( shift @ARGV );
362	}
363}
364exit (0);
365