xref: /dragonfly/tools/tools/locale/tools/cldr2def.pl (revision 00cac4e7)
1#!/usr/local/bin/perl -wC
2
3use strict;
4use File::Copy;
5use XML::Parser;
6use Tie::IxHash;
7use Data::Dumper;
8use Getopt::Long;
9use Digest::SHA qw(sha1_hex);
10require "charmaps.pm";
11
12
13if ($#ARGV < 2) {
14	print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15	exit(1);
16}
17
18my $DEFENCODING = "UTF-8";
19my @filter = ();
20
21my $CLDRDIR = undef;
22my $UNIDATADIR = undef;
23my $ETCDIR = undef;
24my $TYPE = undef;
25my $doonly = undef;
26
27my $result = GetOptions (
28		"cldr=s"	=> \$CLDRDIR,
29		"unidata=s"	=> \$UNIDATADIR,
30		"etc=s"		=> \$ETCDIR,
31		"type=s"	=> \$TYPE,
32		"lc=s"		=> \$doonly
33	    );
34
35my %convertors = ();
36
37my %ucd = ();
38my %values = ();
39my %hashtable = ();
40my %languages = ();
41my %translations = ();
42my %encodings = ();
43my %alternativemonths = ();
44get_languages();
45
46my %utf8map = ();
47my %utf8aliases = ();
48get_unidata($UNIDATADIR);
49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50get_encodings("$ETCDIR/charmaps");
51
52my %keys = ();
53tie(%keys, "Tie::IxHash");
54tie(%hashtable, "Tie::IxHash");
55
56my %FILESNAMES = (
57	"monetdef"	=> "LC_MONETARY",
58	"timedef"	=> "LC_TIME",
59	"msgdef"	=> "LC_MESSAGES",
60	"numericdef"	=> "LC_NUMERIC",
61	"colldef"	=> "LC_COLLATE",
62	"ctypedef"	=> "LC_CTYPE"
63);
64
65my %callback = (
66	mdorder => \&callback_mdorder,
67	altmon => \&callback_altmon,
68	cformat => \&callback_cformat,
69	cbabmon => \&callback_abmon,
70	data => undef,
71);
72
73my %DESC = (
74
75	# numericdef
76	"decimal_point"	=> "decimal_point",
77	"thousands_sep"	=> "thousands_sep",
78	"grouping"	=> "grouping",
79
80	# monetdef
81	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
82				   "SPACE)",
83	"currency_symbol"	=> "currency_symbol",
84	"mon_decimal_point"	=> "mon_decimal_point",
85	"mon_thousands_sep"	=> "mon_thousands_sep",
86	"mon_grouping"		=> "mon_grouping",
87	"positive_sign"		=> "positive_sign",
88	"negative_sign"		=> "negative_sign",
89	"int_frac_digits"	=> "int_frac_digits",
90	"frac_digits"		=> "frac_digits",
91	"p_cs_precedes"		=> "p_cs_precedes",
92	"p_sep_by_space"	=> "p_sep_by_space",
93	"n_cs_precedes"		=> "n_cs_precedes",
94	"n_sep_by_space"	=> "n_sep_by_space",
95	"p_sign_posn"		=> "p_sign_posn",
96	"n_sign_posn"		=> "n_sign_posn",
97
98	# msgdef
99	"yesexpr"	=> "yesexpr",
100	"noexpr"	=> "noexpr",
101	"yesstr"	=> "yesstr",
102	"nostr"		=> "nostr",
103
104	# timedef
105	"abmon"		=> "Short month names",
106	"mon"		=> "Long month names (as in a date)",
107	"abday"		=> "Short weekday names",
108	"day"		=> "Long weekday names",
109	"t_fmt"		=> "X_fmt",
110	"d_fmt"		=> "x_fmt",
111	"c_fmt"		=> "c_fmt",
112	"am_pm"		=> "AM/PM",
113	"d_t_fmt"	=> "date_fmt",
114	"altmon"	=> "Long month names (without case ending)",
115	"md_order"	=> "md_order",
116	"t_fmt_ampm"	=> "ampm_fmt",
117);
118
119if ($TYPE eq "colldef") {
120	transform_collation();
121	make_makefile();
122}
123
124if ($TYPE eq "ctypedef") {
125	transform_ctypes();
126	make_makefile();
127}
128
129if ($TYPE eq "numericdef") {
130	%keys = (
131	    "decimal_point"	=> "s",
132	    "thousands_sep"	=> "s",
133	    "grouping"		=> "ai",
134	);
135	get_fields();
136	print_fields();
137	make_makefile();
138}
139
140if ($TYPE eq "monetdef") {
141	%keys = (
142	    "int_curr_symbol"	=> "s",
143	    "currency_symbol"	=> "s",
144	    "mon_decimal_point"	=> "s",
145	    "mon_thousands_sep"	=> "s",
146	    "mon_grouping"	=> "ai",
147	    "positive_sign"	=> "s",
148	    "negative_sign"	=> "s",
149	    "int_frac_digits"	=> "i",
150	    "frac_digits"	=> "i",
151	    "p_cs_precedes"	=> "i",
152	    "p_sep_by_space"	=> "i",
153	    "n_cs_precedes"	=> "i",
154	    "n_sep_by_space"	=> "i",
155	    "p_sign_posn"	=> "i",
156	    "n_sign_posn"	=> "i"
157	);
158	get_fields();
159	print_fields();
160	make_makefile();
161}
162
163if ($TYPE eq "msgdef") {
164	%keys = (
165	    "yesexpr"		=> "s",
166	    "noexpr"		=> "s",
167	    "yesstr"		=> "s",
168	    "nostr"		=> "s"
169	);
170	get_fields();
171	print_fields();
172	make_makefile();
173}
174
175if ($TYPE eq "timedef") {
176	%keys = (
177	    "abmon"		=> "<cbabmon<abmon<as",
178	    "mon"		=> "as",
179	    "abday"		=> "as",
180	    "day"		=> "as",
181	    "t_fmt"		=> "s",
182	    "d_fmt"		=> "s",
183	    "c_fmt"		=> "<cformat<d_t_fmt<s",
184	    "am_pm"		=> "as",
185	    "d_fmt"		=> "s",
186	    "d_t_fmt"		=> "s",
187	    "altmon"		=> "<altmon<mon<as",
188	    "md_order"		=> "<mdorder<d_fmt<s",
189	    "t_fmt_ampm"	=> "s",
190	);
191	get_fields();
192	print_fields();
193	make_makefile();
194}
195
196sub callback_cformat {
197 	my $s = shift;
198 	$s =~ s/ %Z//;
199 	$s =~ s/ %z//;
200 	return $s;
201};
202
203sub callback_mdorder {
204	my $s = shift;
205	return undef if (!defined $s);
206	$s =~ s/[^dm]//g;
207	return $s;
208};
209
210sub callback_altmon {
211	# if the language/country is known in %alternative months then
212	# return that, otherwise repeat mon
213	my $s = shift;
214
215	if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
216		my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
217		my @cleaned;
218		foreach (@altnames)
219		{
220			$_ =~ s/^\s+//;
221			$_ =~ s/\s+$//;
222			push @cleaned, $_;
223		}
224		return join(";",@cleaned);
225	}
226
227	return $s;
228}
229
230sub callback_abmon {
231	# for specified CJK locales, pad result with a space to enable
232	# columns to line up (style established in FreeBSD in 2001)
233	my $s = shift;
234	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
235
236	if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
237	    $nl eq 'zh_HK' || $nl eq 'zh_TW') {
238		my @monthnames = split(";", $s);
239		my @cleaned;
240		foreach (@monthnames)
241		{
242			if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
243			   ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
244			{
245				$_ =~ s/^"/"<space>/;
246			}
247			push @cleaned, $_;
248		}
249		return join(";",@cleaned);
250	}
251	return $s;
252}
253
254############################
255
256sub get_unidata {
257	my $directory = shift;
258
259	open(FIN, "$directory/UnicodeData.txt")
260	    or die("Cannot open $directory/UnicodeData.txt");;
261	my @lines = <FIN>;
262	chomp(@lines);
263	close(FIN);
264
265	foreach my $l (@lines) {
266		my @a = split(/;/, $l);
267
268		$ucd{code2name}{"$a[0]"} = $a[1];	# Unicode name
269		$ucd{name2code}{"$a[1]"} = $a[0];	# Unicode code
270	}
271}
272
273sub get_utf8map {
274	my $file = shift;
275
276	open(FIN, $file);
277	my @lines = <FIN>;
278	close(FIN);
279	chomp(@lines);
280
281	my $prev_k = undef;
282	my $prev_v = "";
283	my $incharmap = 0;
284	foreach my $l (@lines) {
285		$l =~ s/\r//;
286		next if ($l =~ /^\#/);
287		next if ($l eq "");
288
289		if ($l eq "CHARMAP") {
290			$incharmap = 1;
291			next;
292		}
293
294		next if (!$incharmap);
295		last if ($l eq "END CHARMAP");
296
297		$l =~ /^<([^\s]+)>\s+(.*)/;
298		my $k = $1;
299		my $v = $2;
300		$k =~ s/_/ /g;		# unicode char string
301		$v =~ s/\\x//g;		# UTF-8 char code
302		$utf8map{$k} = $v;
303
304		$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
305
306		$prev_v = $v;
307		$prev_k = $k;
308	}
309}
310
311sub get_encodings {
312	my $dir = shift;
313	foreach my $e (sort(keys(%encodings))) {
314		if (!open(FIN, "$dir/$e.TXT")) {
315			print "Cannot open charmap for $e\n";
316			next;
317
318		}
319		$encodings{$e} = 1;
320		my @lines = <FIN>;
321		close(FIN);
322		chomp(@lines);
323		foreach my $l (@lines) {
324			$l =~ s/\r//;
325			next if ($l =~ /^\#/);
326			next if ($l eq "");
327
328			my @a = split(" ", $l);
329			next if ($#a < 1);
330			$a[0] =~ s/^0[xX]//;	# local char code
331			$a[1] =~ s/^0[xX]//;	# unicode char code
332			$convertors{$e}{uc($a[1])} = uc($a[0]);
333		}
334	}
335}
336
337sub get_languages {
338	my %data = get_xmldata($ETCDIR);
339	%languages = %{$data{L}};
340	%translations = %{$data{T}};
341	%alternativemonths = %{$data{AM}};
342	%encodings = %{$data{E}};
343
344	return if (!defined $doonly);
345
346	my @a = split(/_/, $doonly);
347	if ($#a == 1) {
348		$filter[0] = $a[0];
349		$filter[1] = "x";
350		$filter[2] = $a[1];
351	} elsif ($#a == 2) {
352		$filter[0] = $a[0];
353		$filter[1] = $a[1];
354		$filter[2] = $a[2];
355	}
356
357	print Dumper(@filter);
358	return;
359}
360
361sub transform_ctypes {
362	foreach my $l (sort keys(%languages)) {
363	foreach my $f (sort keys(%{$languages{$l}})) {
364	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
365		next if ($#filter == 2 && ($filter[0] ne $l
366		    || $filter[1] ne $f || $filter[2] ne $c));
367		next if (defined $languages{$l}{$f}{definitions}
368		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
369		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
370		my $file;
371		$file = $l . "_";
372		$file .= $f . "_" if ($f ne "x");
373		$file .= $c;
374		my $actfile = $file;
375
376		my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
377		if (! -f $filename) {
378			print STDERR "Cannot open $filename\n";
379			next;
380		}
381		open(FIN, "$filename");
382		print "Reading from $filename for ${l}_${f}_${c}\n";
383		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
384		my @lines;
385		my $shex;
386		my $uhex;
387		while (<FIN>) {
388			push @lines, $_;
389		}
390		close(FIN);
391		$shex = sha1_hex(join("\n", @lines));
392		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
393		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
394		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
395		print FOUT @lines;
396		close(FOUT);
397		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
398			next if ($enc eq $DEFENCODING);
399			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
400			if (! -f $filename) {
401				print STDERR "Cannot open $filename\n";
402				next;
403			}
404			@lines = ();
405			open(FIN, "$filename");
406			while (<FIN>) {
407				if ((/^comment_char\s/) || (/^escape_char\s/)){
408					push @lines, $_;
409				}
410				if (/^LC_CTYPE/../^END LC_CTYPE/) {
411					push @lines, $_;
412				}
413			}
414			close(FIN);
415			$uhex = sha1_hex(join("\n", @lines) . $enc);
416			$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
417			$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
418			open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
419			print FOUT <<EOF;
420# Warning: Do not edit. This file is automatically extracted from the
421# tools in /usr/src/tools/tools/locale. The data is obtained from the
422# CLDR project, obtained from http://cldr.unicode.org/
423# -----------------------------------------------------------------------------
424EOF
425			print FOUT @lines;
426			close(FOUT);
427		}
428	}
429	}
430	}
431}
432
433
434sub transform_collation {
435	foreach my $l (sort keys(%languages)) {
436	foreach my $f (sort keys(%{$languages{$l}})) {
437	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
438		next if ($#filter == 2 && ($filter[0] ne $l
439		    || $filter[1] ne $f || $filter[2] ne $c));
440		next if (defined $languages{$l}{$f}{definitions}
441		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
442		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
443		my $file;
444		$file = $l . "_";
445		$file .= $f . "_" if ($f ne "x");
446		$file .= $c;
447		my $actfile = $file;
448
449		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
450		$filename = "$ETCDIR/$file.$DEFENCODING.src"
451		    if (! -f $filename);
452		if (! -f $filename
453		 && defined $languages{$l}{$f}{fallback}) {
454			$file = $languages{$l}{$f}{fallback};
455			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
456		}
457		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
458		    if (! -f $filename);
459		if (! -f $filename) {
460			print STDERR
461			    "Cannot open $file.$DEFENCODING.src or fallback\n";
462			next;
463		}
464		open(FIN, "$filename");
465		print "Reading from $filename for ${l}_${f}_${c}\n";
466		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
467		my @lines;
468		my $shex;
469		while (<FIN>) {
470			if ((/^comment_char\s/) || (/^escape_char\s/)){
471				push @lines, $_;
472			}
473			if (/^LC_COLLATE/../^END LC_COLLATE/) {
474				$_ =~ s/[ ]+/ /g;
475				push @lines, $_;
476			}
477		}
478		close(FIN);
479		$shex = sha1_hex(join("\n", @lines));
480		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
481		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
482		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
483		print FOUT <<EOF;
484# Warning: Do not edit. This file is automatically extracted from the
485# tools in /usr/src/tools/tools/locale. The data is obtained from the
486# CLDR project, obtained from http://cldr.unicode.org/
487# -----------------------------------------------------------------------------
488EOF
489		print FOUT @lines;
490		close(FOUT);
491
492		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
493			next if ($enc eq $DEFENCODING);
494			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
495			      "$TYPE.draft/$actfile.$enc.src");
496			$languages{$l}{$f}{data}{$c}{$enc} = $shex;
497			$hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
498		}
499	}
500	}
501	}
502}
503
504sub get_fields {
505	foreach my $l (sort keys(%languages)) {
506	foreach my $f (sort keys(%{$languages{$l}})) {
507	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
508		next if ($#filter == 2 && ($filter[0] ne $l
509		    || $filter[1] ne $f || $filter[2] ne $c));
510		next if (defined $languages{$l}{$f}{definitions}
511		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
512
513		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
514		my $file;
515		$file = $l . "_";
516		$file .= $f . "_" if ($f ne "x");
517		$file .= $c;
518
519		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
520		$filename = "$ETCDIR/$file.$DEFENCODING.src"
521		    if (! -f $filename);
522		if (! -f $filename
523		 && defined $languages{$l}{$f}{fallback}) {
524			$file = $languages{$l}{$f}{fallback};
525			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
526		}
527		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
528		    if (! -f $filename);
529		if (! -f $filename) {
530			print STDERR
531			    "Cannot open $file.$DEFENCODING.src or fallback\n";
532			next;
533		}
534		open(FIN, "$filename");
535		print "Reading from $filename for ${l}_${f}_${c}\n";
536		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
537		my @lines = <FIN>;
538		chomp(@lines);
539		close(FIN);
540		my $continue = 0;
541		foreach my $k (keys(%keys)) {
542			foreach my $line (@lines) {
543				$line =~ s/\r//;
544				next if (!$continue && $line !~ /^$k\s/);
545				if ($continue) {
546					$line =~ s/^\s+//;
547				} else {
548					$line =~ s/^$k\s+//;
549				}
550
551				$values{$l}{$c}{$k} = ""
552					if (!defined $values{$l}{$c}{$k});
553
554				$continue = ($line =~ /\/$/);
555				$line =~ s/\/$// if ($continue);
556
557				while ($line =~ /_/) {
558					$line =~
559					    s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
560				}
561				die "_ in data - $line" if ($line =~ /_/);
562				$values{$l}{$c}{$k} .= $line;
563
564				last if (!$continue);
565			}
566		}
567	}
568	}
569	}
570}
571
572sub decodecldr {
573	my $e = shift;
574	my $s = shift;
575
576	my $v = undef;
577
578	if ($e eq "UTF-8") {
579		#
580		# Conversion to UTF-8 can be done from the Unicode name to
581		# the UTF-8 character code.
582		#
583		$v = $utf8map{$s};
584		die "Cannot convert $s in $e (charmap)" if (!defined $v);
585	} else {
586		#
587		# Conversion to these encodings can be done from the Unicode
588		# name to Unicode code to the encodings code.
589		#
590		my $ucc = undef;
591		$ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
592		$ucc = $ucd{name2code}{$utf8aliases{$s}}
593			if (!defined $ucc
594			 && $utf8aliases{$s}
595			 && defined $ucd{name2code}{$utf8aliases{$s}});
596
597		if (!defined $ucc) {
598			if (defined $translations{$e}{$s}{hex}) {
599				$v = $translations{$e}{$s}{hex};
600				$ucc = 0;
601			} elsif (defined $translations{$e}{$s}{ucc}) {
602				$ucc = $translations{$e}{$s}{ucc};
603			}
604		}
605
606		die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
607		$v = $convertors{$e}{$ucc} if (!defined $v);
608
609		$v = $translations{$e}{$s}{hex}
610			if (!defined $v && defined $translations{$e}{$s}{hex});
611
612		if (!defined $v && defined $translations{$e}{$s}{unicode}) {
613			my $ucn = $translations{$e}{$s}{unicode};
614			$ucc = $ucd{name2code}{$ucn}
615				if (defined $ucd{name2code}{$ucn});
616			$ucc = $ucd{name2code}{$utf8aliases{$ucn}}
617				if (!defined $ucc
618				 && defined $ucd{name2code}{$utf8aliases{$ucn}});
619			$v = $convertors{$e}{$ucc};
620		}
621
622		die "Cannot convert $s in $e (charmap)" if (!defined $v);
623	}
624
625	return pack("C", hex($v)) if (length($v) == 2);
626	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
627		if (length($v) == 4);
628	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
629	    hex(substr($v, 4, 2))) if (length($v) == 6);
630	print STDERR "Cannot convert $e $s\n";
631	return "length = " . length($v);
632
633}
634
635sub translate {
636	my $enc = shift;
637	my $v = shift;
638
639	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
640	return undef;
641}
642
643sub print_fields {
644	foreach my $l (sort keys(%languages)) {
645	foreach my $f (sort keys(%{$languages{$l}})) {
646	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
647		next if ($#filter == 2 && ($filter[0] ne $l
648		    || $filter[1] ne $f || $filter[2] ne $c));
649		next if (defined $languages{$l}{$f}{definitions}
650		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
651		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
652			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
653				print "Skipping ${l}_" .
654				    ($f eq "x" ? "" : "${f}_") .
655				    "${c} - not read\n";
656				next;
657			}
658			my $file = $l;
659			$file .= "_" . $f if ($f ne "x");
660			$file .= "_" . $c;
661			print "Writing to $file in $enc\n";
662
663			if ($enc ne $DEFENCODING &&
664			    !defined $convertors{$enc}) {
665				print "Failed! Cannot convert to $enc.\n";
666				next;
667			};
668
669			open(FOUT, ">$TYPE.draft/$file.$enc.new");
670			my $okay = 1;
671			my $output = "";
672			print FOUT <<EOF;
673# Warning: Do not edit. This file is automatically generated from the
674# tools in /usr/src/tools/tools/locale. The data is obtained from the
675# CLDR project, obtained from http://cldr.unicode.org/
676# -----------------------------------------------------------------------------
677EOF
678			foreach my $k (keys(%keys)) {
679				my $f = $keys{$k};
680
681				die("Unknown $k in \%DESC")
682					if (!defined $DESC{$k});
683
684				$output .= "#\n# $DESC{$k}\n";
685
686				# Replace one row with another
687				if ($f =~ /^>/) {
688					$k = substr($f, 1);
689					$f = $keys{$k};
690				}
691
692				# Callback function
693				if ($f =~ /^\</) {
694					$callback{data}{c} = $c;
695					$callback{data}{k} = $k;
696					$callback{data}{l} = $l;
697					$callback{data}{e} = $enc;
698					my @a = split(/\</, substr($f, 1));
699					my $rv =
700					    &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
701					$values{$l}{$c}{$k} = $rv;
702					$f = $a[2];
703					$callback{data} = ();
704				}
705
706				my $v = $values{$l}{$c}{$k};
707				$v = "undef" if (!defined $v);
708
709				if ($f eq "i") {
710					$output .= "$v\n";
711					next;
712				}
713				if ($f eq "ai") {
714					$output .= "$v\n";
715					next;
716				}
717				if ($f eq "s") {
718					$v =~ s/^"//;
719					$v =~ s/"$//;
720					my $cm = "";
721					while ($v =~ /^(.*?)<(.*?)>(.*)/) {
722						my $p1 = $1;
723						$cm = $2;
724						my $p3 = $3;
725
726						my $rv = decodecldr($enc, $cm);
727#						$rv = translate($enc, $cm)
728#							if (!defined $rv);
729						if (!defined $rv) {
730							print STDERR
731"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
732							$okay = 0;
733							next;
734						}
735
736						$v = $p1 . $rv . $p3;
737					}
738					$output .= "$v\n";
739					next;
740				}
741				if ($f eq "as") {
742					foreach my $v (split(/;/, $v)) {
743						$v =~ s/^"//;
744						$v =~ s/"$//;
745						my $cm = "";
746						while ($v =~ /^(.*?)<(.*?)>(.*)/) {
747							my $p1 = $1;
748							$cm = $2;
749							my $p3 = $3;
750
751							my $rv =
752							    decodecldr($enc,
753								$cm);
754#							$rv = translate($enc,
755#							    $cm)
756#							    if (!defined $rv);
757							if (!defined $rv) {
758								print STDERR
759"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
760								$okay = 0;
761								next;
762							}
763
764							$v = $1 . $rv . $3;
765						}
766						$output .= "$v\n";
767					}
768					next;
769				}
770
771				die("$k is '$f'");
772
773			}
774
775			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
776			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
777			print FOUT "$output# EOF\n";
778			close(FOUT);
779
780			if ($okay) {
781				rename("$TYPE.draft/$file.$enc.new",
782				    "$TYPE.draft/$file.$enc.src");
783			} else {
784				rename("$TYPE.draft/$file.$enc.new",
785				    "$TYPE.draft/$file.$enc.failed");
786			}
787		}
788	}
789	}
790	}
791}
792
793sub make_makefile {
794	return if ($#filter > -1);
795	print "Creating Makefile for $TYPE\n";
796	my $SRCOUT;
797	my $SRCOUT2;
798	my $SRCOUT3;
799	my $MAPLOC;
800	if ($TYPE eq "colldef") {
801		$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
802			"\t-f \${MAPLOC}/map.UTF-8 " .
803			"\${.OBJDIR}/\${.IMPSRC:T:R}";
804		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
805				"locale/etc/final-maps\n";
806		$SRCOUT2 = "LC_COLLATE";
807	}
808	elsif ($TYPE eq "ctypedef") {
809		$SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
810			"\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
811			"\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
812			" || true";
813		$SRCOUT2 = "LC_CTYPE";
814		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
815				"locale/etc/final-maps\n";
816		$SRCOUT3 = "## SYMPAIRS\n\n" .
817			".for PAIR in \${SYMPAIRS}\n" .
818			"\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
819			"\${PAIR:C/:.*//}\n" .
820			"\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
821			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
822			"\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
823			" || true\n" .
824			".endfor\n\n";
825	}
826	else {
827		$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
828		$SRCOUT2 = "out";
829		$MAPLOC = "";
830	}
831	open(FOUT, ">$TYPE.draft/Makefile");
832	print FOUT <<EOF;
833# Warning: Do not edit. This file is automatically generated from the
834# tools in /usr/src/tools/tools/locale.
835
836LOCALEDIR=	\${SHAREDIR}/locale
837FILESNAME=	$FILESNAMES{$TYPE}
838.SUFFIXES:	.src .${SRCOUT2}
839${MAPLOC}
840.src.${SRCOUT2}:
841	$SRCOUT
842
843## PLACEHOLDER
844
845EOF
846
847	foreach my $hash (keys(%hashtable)) {
848		# For colldef, weight LOCALES to UTF-8
849		#     Sort as upper-case and reverse to achieve it
850		#     Make en_US, ru_RU, and ca_AD preferred
851		my @files;
852		if ($TYPE eq "colldef") {
853			@files = sort {
854				if ($a eq 'en_x_US.UTF-8' ||
855				    $a eq 'ru_x_RU.UTF-8' ||
856				    $a eq 'ca_x_AD.UTF-8') { return -1; }
857				elsif ($b eq 'en_x_US.UTF-8' ||
858				       $b eq 'ru_x_RU.UTF-8' ||
859				       $b eq 'ca_x_AD.UTF-8') { return 1; }
860				else { return uc($b) cmp uc($a); }
861				} keys(%{$hashtable{$hash}});
862		} elsif ($TYPE eq "ctypedef") {
863			@files = sort {
864				if ($a eq 'en_x_US.UTF-8') { return -1; }
865				elsif ($b eq 'en_x_US.UTF-8') { return 1; }
866				if ($a =~ /^en_x_US/) { return -1; }
867				elsif ($b =~ /^en_x_US/) { return 1; }
868
869				if ($a =~ /^en_x_GB.ISO8859-15/ ||
870				    $a =~ /^ru_x_RU/) { return -1; }
871				elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
872				       $b =~ /ru_x_RU/) { return 1; }
873				else { return uc($b) cmp uc($a); }
874
875				} keys(%{$hashtable{$hash}});
876		} else {
877			@files = sort {
878				if ($a =~ /_Comm_/ ||
879				    $b eq 'en_x_US.UTF-8') { return 1; }
880				elsif ($b =~ /_Comm_/ ||
881				       $a eq 'en_x_US.UTF-8') { return -1; }
882				else { return uc($b) cmp uc($a); }
883				} keys(%{$hashtable{$hash}});
884		}
885		if ($#files > 0) {
886			my $link = shift(@files);
887			$link =~ s/_x_/_/;	# strip family if none there
888			foreach my $file (@files) {
889				my @a = split(/_/, $file);
890				my @b = split(/\./, $a[-1]);
891				$file =~ s/_x_/_/;
892				print FOUT "SAME+=\t\t$link:$file\n";
893				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
894			}
895		}
896	}
897
898	foreach my $l (sort keys(%languages)) {
899	foreach my $f (sort keys(%{$languages{$l}})) {
900	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
901		next if ($#filter == 2 && ($filter[0] ne $l
902		    || $filter[1] ne $f || $filter[2] ne $c));
903		next if (defined $languages{$l}{$f}{definitions}
904		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
905		if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
906		 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
907			print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
908			    "${c} - not read\n";
909			next;
910		}
911		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
912			my $file = $l . "_";
913			$file .= $f . "_" if ($f ne "x");
914			$file .= $c;
915			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
916			print FOUT "LOCALES+=\t$file.$e\n";
917		}
918
919		if (defined $languages{$l}{$f}{nc_link}) {
920			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
921				my $file = $l . "_";
922				$file .= $f . "_" if ($f ne "x");
923				$file .= $c;
924				print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
925			}
926		}
927
928		if (defined $languages{$l}{$f}{e_link}) {
929			foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
930				my @a = split(/:/, $el);
931				my $file = $l . "_";
932				$file .= $f . "_" if ($f ne "x");
933				$file .= $c;
934				print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
935			}
936		}
937
938	}
939	}
940	}
941
942	print FOUT <<EOF;
943
944FILES=		\${LOCALES:S/\$/.${SRCOUT2}/}
945CLEANFILES=	\${FILES}
946
947.for f in \${SAME}
948SYMLINKS+=	../\${f:C/:.*\$//}/\${FILESNAME} \\
949		\${LOCALEDIR}/\${f:C/^.*://}/\${FILESNAME}
950.endfor
951
952.for f in \${LOCALES}
953FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
954.endfor
955
956${SRCOUT3}.include <bsd.prog.mk>
957EOF
958
959	close(FOUT);
960}
961