xref: /dragonfly/tools/tools/locale/tools/cldr2def.pl (revision f223f854)
1#!/usr/local/bin/perl -wC
2
3use strict;
4use File::Copy;
5use XML::Parser;
6use Tie::IxHash;
7use Data::Dumper;
8use Getopt::Long;
9use Digest::SHA qw(sha1_hex);
10require "charmaps.pm";
11
12
13if ($#ARGV < 2) {
14	print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15	exit(1);
16}
17
18my $DEFENCODING = "UTF-8";
19my @filter = ();
20
21my $CLDRDIR = undef;
22my $UNIDATADIR = undef;
23my $ETCDIR = undef;
24my $TYPE = undef;
25my $doonly = undef;
26
27my $result = GetOptions (
28		"cldr=s"	=> \$CLDRDIR,
29		"unidata=s"	=> \$UNIDATADIR,
30		"etc=s"		=> \$ETCDIR,
31		"type=s"	=> \$TYPE,
32		"lc=s"		=> \$doonly
33	    );
34
35my %convertors = ();
36
37my %ucd = ();
38my %values = ();
39my %hashtable = ();
40my %languages = ();
41my %translations = ();
42my %encodings = ();
43my %alternativemonths = ();
44get_languages();
45
46my %utf8map = ();
47my %utf8aliases = ();
48get_unidata($UNIDATADIR);
49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50get_encodings("$ETCDIR/charmaps");
51
52my %keys = ();
53tie(%keys, "Tie::IxHash");
54tie(%hashtable, "Tie::IxHash");
55
56my %FILESNAMES = (
57	"monetdef"	=> "LC_MONETARY",
58	"timedef"	=> "LC_TIME",
59	"msgdef"	=> "LC_MESSAGES",
60	"numericdef"	=> "LC_NUMERIC",
61	"colldef"       => "LC_COLLATE",
62	"ctypedef"	=> "LC_CTYPE"
63);
64
65my %callback = (
66	mdorder => \&callback_mdorder,
67	altmon => \&callback_altmon,
68	cformat => \&callback_cformat,
69	data => undef,
70);
71
72my %DESC = (
73
74	# numericdef
75	"decimal_point"	=> "decimal_point",
76	"thousands_sep"	=> "thousands_sep",
77	"grouping"	=> "grouping",
78
79	# monetdef
80	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
81				   "SPACE)",
82	"currency_symbol"	=> "currency_symbol",
83	"mon_decimal_point"	=> "mon_decimal_point",
84	"mon_thousands_sep"	=> "mon_thousands_sep",
85	"mon_grouping"		=> "mon_grouping",
86	"positive_sign"		=> "positive_sign",
87	"negative_sign"		=> "negative_sign",
88	"int_frac_digits"	=> "int_frac_digits",
89	"frac_digits"		=> "frac_digits",
90	"p_cs_precedes"		=> "p_cs_precedes",
91	"p_sep_by_space"	=> "p_sep_by_space",
92	"n_cs_precedes"		=> "n_cs_precedes",
93	"n_sep_by_space"	=> "n_sep_by_space",
94	"p_sign_posn"		=> "p_sign_posn",
95	"n_sign_posn"		=> "n_sign_posn",
96
97	# msgdef
98	"yesexpr"	=> "yesexpr",
99	"noexpr"	=> "noexpr",
100	"yesstr"	=> "yesstr",
101	"nostr"		=> "nostr",
102
103	# timedef
104	"abmon"		=> "Short month names",
105	"mon"		=> "Long month names (as in a date)",
106	"abday"		=> "Short weekday names",
107	"day"		=> "Long weekday names",
108	"t_fmt"		=> "X_fmt",
109	"d_fmt"		=> "x_fmt",
110	"c_fmt"		=> "c_fmt",
111	"am_pm"		=> "AM/PM",
112	"d_t_fmt"	=> "date_fmt",
113	"altmon"	=> "Long month names (without case ending)",
114	"md_order"	=> "md_order",
115	"t_fmt_ampm"	=> "ampm_fmt",
116);
117
118if ($TYPE eq "colldef") {
119	transform_collation();
120	make_makefile();
121}
122
123if ($TYPE eq "ctypedef") {
124	transform_ctypes();
125	make_makefile();
126}
127
128if ($TYPE eq "numericdef") {
129	%keys = (
130	    "decimal_point"	=> "s",
131	    "thousands_sep"	=> "s",
132	    "grouping"		=> "ai",
133	);
134	get_fields();
135	print_fields();
136	make_makefile();
137}
138
139if ($TYPE eq "monetdef") {
140	%keys = (
141	    "int_curr_symbol"	=> "s",
142	    "currency_symbol"	=> "s",
143	    "mon_decimal_point"	=> "s",
144	    "mon_thousands_sep"	=> "s",
145	    "mon_grouping"	=> "ai",
146	    "positive_sign"	=> "s",
147	    "negative_sign"	=> "s",
148	    "int_frac_digits"	=> "i",
149	    "frac_digits"	=> "i",
150	    "p_cs_precedes"	=> "i",
151	    "p_sep_by_space"	=> "i",
152	    "n_cs_precedes"	=> "i",
153	    "n_sep_by_space"	=> "i",
154	    "p_sign_posn"	=> "i",
155	    "n_sign_posn"	=> "i"
156	);
157	get_fields();
158	print_fields();
159	make_makefile();
160}
161
162if ($TYPE eq "msgdef") {
163	%keys = (
164	    "yesexpr"		=> "s",
165	    "noexpr"		=> "s",
166	    "yesstr"		=> "s",
167	    "nostr"		=> "s"
168	);
169	get_fields();
170	print_fields();
171	make_makefile();
172}
173
174if ($TYPE eq "timedef") {
175	%keys = (
176	    "abmon"		=> "as",
177	    "mon"		=> "as",
178	    "abday"		=> "as",
179	    "day"		=> "as",
180	    "t_fmt"		=> "s",
181	    "d_fmt"		=> "s",
182	    "c_fmt"		=> "<cformat<d_t_fmt<s",
183	    "am_pm"		=> "as",
184	    "d_fmt"		=> "s",
185	    "d_t_fmt"		=> "s",
186	    "altmon"		=> "<altmon<mon<as",
187	    "md_order"		=> "<mdorder<d_fmt<s",
188	    "t_fmt_ampm"	=> "s",
189	);
190	get_fields();
191	print_fields();
192	make_makefile();
193}
194
195sub callback_cformat {
196 	my $s = shift;
197 	$s =~ s/ %Z//;
198 	$s =~ s/ %z//;
199 	return $s;
200};
201
202sub callback_mdorder {
203	my $s = shift;
204	return undef if (!defined $s);
205	$s =~ s/[^dm]//g;
206	return $s;
207};
208
209sub callback_altmon {
210	# if the language/country is known in %alternative months then
211	# return that, otherwise repeat mon
212	my $s = shift;
213
214	if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
215		my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
216		my @cleaned;
217		foreach (@altnames)
218		{
219			$_ =~ s/^\s+//;
220			$_ =~ s/\s+$//;
221			push @cleaned, $_;
222		}
223		return join(";",@cleaned);
224	}
225
226	return $s;
227}
228
229############################
230
231sub get_unidata {
232	my $directory = shift;
233
234	open(FIN, "$directory/UnicodeData.txt")
235	    or die("Cannot open $directory/UnicodeData.txt");;
236	my @lines = <FIN>;
237	chomp(@lines);
238	close(FIN);
239
240	foreach my $l (@lines) {
241		my @a = split(/;/, $l);
242
243		$ucd{code2name}{"$a[0]"} = $a[1];	# Unicode name
244		$ucd{name2code}{"$a[1]"} = $a[0];	# Unicode code
245	}
246}
247
248sub get_utf8map {
249	my $file = shift;
250
251	open(FIN, $file);
252	my @lines = <FIN>;
253	close(FIN);
254	chomp(@lines);
255
256	my $prev_k = undef;
257	my $prev_v = "";
258	my $incharmap = 0;
259	foreach my $l (@lines) {
260		$l =~ s/\r//;
261		next if ($l =~ /^\#/);
262		next if ($l eq "");
263
264		if ($l eq "CHARMAP") {
265			$incharmap = 1;
266			next;
267		}
268
269		next if (!$incharmap);
270		last if ($l eq "END CHARMAP");
271
272		$l =~ /^<([^\s]+)>\s+(.*)/;
273		my $k = $1;
274		my $v = $2;
275		$k =~ s/_/ /g;		# unicode char string
276		$v =~ s/\\x//g;		# UTF-8 char code
277		$utf8map{$k} = $v;
278
279		$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
280
281		$prev_v = $v;
282		$prev_k = $k;
283	}
284}
285
286sub get_encodings {
287	my $dir = shift;
288	foreach my $e (sort(keys(%encodings))) {
289		if (!open(FIN, "$dir/$e.TXT")) {
290			print "Cannot open charmap for $e\n";
291			next;
292
293		}
294		$encodings{$e} = 1;
295		my @lines = <FIN>;
296		close(FIN);
297		chomp(@lines);
298		foreach my $l (@lines) {
299			$l =~ s/\r//;
300			next if ($l =~ /^\#/);
301			next if ($l eq "");
302
303			my @a = split(" ", $l);
304			next if ($#a < 1);
305			$a[0] =~ s/^0[xX]//;	# local char code
306			$a[1] =~ s/^0[xX]//;	# unicode char code
307			$convertors{$e}{uc($a[1])} = uc($a[0]);
308		}
309	}
310}
311
312sub get_languages {
313	my %data = get_xmldata($ETCDIR);
314	%languages = %{$data{L}};
315	%translations = %{$data{T}};
316	%alternativemonths = %{$data{AM}};
317	%encodings = %{$data{E}};
318
319	return if (!defined $doonly);
320
321	my @a = split(/_/, $doonly);
322	if ($#a == 1) {
323		$filter[0] = $a[0];
324		$filter[1] = "x";
325		$filter[2] = $a[1];
326	} elsif ($#a == 2) {
327		$filter[0] = $a[0];
328		$filter[1] = $a[1];
329		$filter[2] = $a[2];
330	}
331
332	print Dumper(@filter);
333	return;
334}
335
336sub transform_ctypes {
337	foreach my $l (sort keys(%languages)) {
338	foreach my $f (sort keys(%{$languages{$l}})) {
339	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
340		next if ($#filter == 2 && ($filter[0] ne $l
341		    || $filter[1] ne $f || $filter[2] ne $c));
342		next if (defined $languages{$l}{$f}{definitions}
343		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
344		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
345		my $file;
346		$file = $l . "_";
347		$file .= $f . "_" if ($f ne "x");
348		$file .= $c;
349		my $actfile = $file;
350		if ($c eq "COMMON") { $actfile = "common"; }
351
352		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
353		$filename = "$ETCDIR/$file.$DEFENCODING.src"
354		    if (! -f $filename);
355		if (! -f $filename
356		 && defined $languages{$l}{$f}{fallback}) {
357			$file = $languages{$l}{$f}{fallback};
358			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
359		}
360		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
361		    if (! -f $filename);
362		if (! -f $filename) {
363			print STDERR
364			    "Cannot open $file.$DEFENCODING.src or fallback\n";
365			next;
366		}
367		open(FIN, "$filename");
368		print "Reading from $filename for ${l}_${f}_${c}\n";
369		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
370		my @lines;
371		my $shex;
372		my $uhex;
373		while (<FIN>) {
374			if ((/^comment_char\s/) || (/^escape_char\s/)){
375				push @lines, $_;
376			}
377			if (/^LC_CTYPE/../^END LC_CTYPE/) {
378				push @lines, $_;
379			}
380		}
381		close(FIN);
382		$shex = sha1_hex(join("\n", @lines));
383		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
384		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
385		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
386		print FOUT <<EOF;
387# Warning: Do not edit. This file is automatically extracted from the
388# tools in /usr/src/tools/tools/locale. The data is obtained from the
389# CLDR project, obtained from http://cldr.unicode.org/
390# -----------------------------------------------------------------------------
391EOF
392		print FOUT @lines;
393		close(FOUT);
394
395		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
396			next if ($enc eq $DEFENCODING);
397			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
398			      "$TYPE.draft/$actfile.$enc.src");
399			$uhex = sha1_hex(join("\n", @lines) . $enc);
400			$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
401			$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
402		}
403	}
404	}
405	}
406}
407
408
409sub transform_collation {
410	foreach my $l (sort keys(%languages)) {
411	foreach my $f (sort keys(%{$languages{$l}})) {
412	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
413		next if ($#filter == 2 && ($filter[0] ne $l
414		    || $filter[1] ne $f || $filter[2] ne $c));
415		next if (defined $languages{$l}{$f}{definitions}
416		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
417		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
418		my $file;
419		$file = $l . "_";
420		$file .= $f . "_" if ($f ne "x");
421		$file .= $c;
422		my $actfile = $file;
423		if ($c eq "COMMON") { $actfile = "common"; }
424
425		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
426		$filename = "$ETCDIR/$file.$DEFENCODING.src"
427		    if (! -f $filename);
428		if (! -f $filename
429		 && defined $languages{$l}{$f}{fallback}) {
430			$file = $languages{$l}{$f}{fallback};
431			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
432		}
433		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
434		    if (! -f $filename);
435		if (! -f $filename) {
436			print STDERR
437			    "Cannot open $file.$DEFENCODING.src or fallback\n";
438			next;
439		}
440		open(FIN, "$filename");
441		print "Reading from $filename for ${l}_${f}_${c}\n";
442		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
443		my @lines;
444		my $shex;
445		while (<FIN>) {
446			if ((/^comment_char\s/) || (/^escape_char\s/)){
447				push @lines, $_;
448			}
449			if (/^LC_COLLATE/../^END LC_COLLATE/) {
450				$_ =~ s/[ ]+/ /g;
451				push @lines, $_;
452			}
453		}
454		close(FIN);
455		$shex = sha1_hex(join("\n", @lines));
456		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
457		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
458		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
459		print FOUT <<EOF;
460# Warning: Do not edit. This file is automatically extracted from the
461# tools in /usr/src/tools/tools/locale. The data is obtained from the
462# CLDR project, obtained from http://cldr.unicode.org/
463# -----------------------------------------------------------------------------
464EOF
465		print FOUT @lines;
466		close(FOUT);
467
468		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
469			next if ($enc eq $DEFENCODING);
470			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
471			      "$TYPE.draft/$actfile.$enc.src");
472			$languages{$l}{$f}{data}{$c}{$enc} = $shex;
473			$hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
474		}
475	}
476	}
477	}
478}
479
480sub get_fields {
481	foreach my $l (sort keys(%languages)) {
482	foreach my $f (sort keys(%{$languages{$l}})) {
483	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
484		next if ($#filter == 2 && ($filter[0] ne $l
485		    || $filter[1] ne $f || $filter[2] ne $c));
486		next if (defined $languages{$l}{$f}{definitions}
487		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
488
489		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
490		my $file;
491		$file = $l . "_";
492		$file .= $f . "_" if ($f ne "x");
493		$file .= $c;
494
495		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
496		$filename = "$ETCDIR/$file.$DEFENCODING.src"
497		    if (! -f $filename);
498		if (! -f $filename
499		 && defined $languages{$l}{$f}{fallback}) {
500			$file = $languages{$l}{$f}{fallback};
501			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
502		}
503		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
504		    if (! -f $filename);
505		if (! -f $filename) {
506			print STDERR
507			    "Cannot open $file.$DEFENCODING.src or fallback\n";
508			next;
509		}
510		open(FIN, "$filename");
511		print "Reading from $filename for ${l}_${f}_${c}\n";
512		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
513		my @lines = <FIN>;
514		chomp(@lines);
515		close(FIN);
516		my $continue = 0;
517		foreach my $k (keys(%keys)) {
518			foreach my $line (@lines) {
519				$line =~ s/\r//;
520				next if (!$continue && $line !~ /^$k\s/);
521				if ($continue) {
522					$line =~ s/^\s+//;
523				} else {
524					$line =~ s/^$k\s+//;
525				}
526
527				$values{$l}{$c}{$k} = ""
528					if (!defined $values{$l}{$c}{$k});
529
530				$continue = ($line =~ /\/$/);
531				$line =~ s/\/$// if ($continue);
532
533				while ($line =~ /_/) {
534					$line =~
535					    s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
536				}
537				die "_ in data - $line" if ($line =~ /_/);
538				$values{$l}{$c}{$k} .= $line;
539
540				last if (!$continue);
541			}
542		}
543	}
544	}
545	}
546}
547
548sub decodecldr {
549	my $e = shift;
550	my $s = shift;
551
552	my $v = undef;
553
554	if ($e eq "UTF-8") {
555		#
556		# Conversion to UTF-8 can be done from the Unicode name to
557		# the UTF-8 character code.
558		#
559		$v = $utf8map{$s};
560		die "Cannot convert $s in $e (charmap)" if (!defined $v);
561	} else {
562		#
563		# Conversion to these encodings can be done from the Unicode
564		# name to Unicode code to the encodings code.
565		#
566		my $ucc = undef;
567		$ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
568		$ucc = $ucd{name2code}{$utf8aliases{$s}}
569			if (!defined $ucc
570			 && $utf8aliases{$s}
571			 && defined $ucd{name2code}{$utf8aliases{$s}});
572
573		if (!defined $ucc) {
574			if (defined $translations{$e}{$s}{hex}) {
575				$v = $translations{$e}{$s}{hex};
576				$ucc = 0;
577			} elsif (defined $translations{$e}{$s}{ucc}) {
578				$ucc = $translations{$e}{$s}{ucc};
579			}
580		}
581
582		die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
583		$v = $convertors{$e}{$ucc} if (!defined $v);
584
585		$v = $translations{$e}{$s}{hex}
586			if (!defined $v && defined $translations{$e}{$s}{hex});
587
588		if (!defined $v && defined $translations{$e}{$s}{unicode}) {
589			my $ucn = $translations{$e}{$s}{unicode};
590			$ucc = $ucd{name2code}{$ucn}
591				if (defined $ucd{name2code}{$ucn});
592			$ucc = $ucd{name2code}{$utf8aliases{$ucn}}
593				if (!defined $ucc
594				 && defined $ucd{name2code}{$utf8aliases{$ucn}});
595			$v = $convertors{$e}{$ucc};
596		}
597
598		die "Cannot convert $s in $e (charmap)" if (!defined $v);
599	}
600
601	return pack("C", hex($v)) if (length($v) == 2);
602	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
603		if (length($v) == 4);
604	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
605	    hex(substr($v, 4, 2))) if (length($v) == 6);
606	print STDERR "Cannot convert $e $s\n";
607	return "length = " . length($v);
608
609}
610
611sub translate {
612	my $enc = shift;
613	my $v = shift;
614
615	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
616	return undef;
617}
618
619sub print_fields {
620	foreach my $l (sort keys(%languages)) {
621	foreach my $f (sort keys(%{$languages{$l}})) {
622	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
623		next if ($#filter == 2 && ($filter[0] ne $l
624		    || $filter[1] ne $f || $filter[2] ne $c));
625		next if (defined $languages{$l}{$f}{definitions}
626		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
627		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
628			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
629				print "Skipping ${l}_" .
630				    ($f eq "x" ? "" : "${f}_") .
631				    "${c} - not read\n";
632				next;
633			}
634			my $file = $l;
635			$file .= "_" . $f if ($f ne "x");
636			$file .= "_" . $c;
637			if ($c eq "COMMON") { $file = "common"; }
638			print "Writing to $file in $enc\n";
639
640			if ($enc ne $DEFENCODING &&
641			    !defined $convertors{$enc}) {
642				print "Failed! Cannot convert to $enc.\n";
643				next;
644			};
645
646			open(FOUT, ">$TYPE.draft/$file.$enc.new");
647			my $okay = 1;
648			my $output = "";
649			print FOUT <<EOF;
650# Warning: Do not edit. This file is automatically generated from the
651# tools in /usr/src/tools/tools/locale. The data is obtained from the
652# CLDR project, obtained from http://cldr.unicode.org/
653# -----------------------------------------------------------------------------
654EOF
655			foreach my $k (keys(%keys)) {
656				my $f = $keys{$k};
657
658				die("Unknown $k in \%DESC")
659					if (!defined $DESC{$k});
660
661				$output .= "#\n# $DESC{$k}\n";
662
663				# Replace one row with another
664				if ($f =~ /^>/) {
665					$k = substr($f, 1);
666					$f = $keys{$k};
667				}
668
669				# Callback function
670				if ($f =~ /^\</) {
671					$callback{data}{c} = $c;
672					$callback{data}{k} = $k;
673					$callback{data}{l} = $l;
674					$callback{data}{e} = $enc;
675					my @a = split(/\</, substr($f, 1));
676					my $rv =
677					    &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
678					$values{$l}{$c}{$k} = $rv;
679					$f = $a[2];
680					$callback{data} = ();
681				}
682
683				my $v = $values{$l}{$c}{$k};
684				$v = "undef" if (!defined $v);
685
686				if ($f eq "i") {
687					$output .= "$v\n";
688					next;
689				}
690				if ($f eq "ai") {
691					$output .= "$v\n";
692					next;
693				}
694				if ($f eq "s") {
695					$v =~ s/^"//;
696					$v =~ s/"$//;
697					my $cm = "";
698					while ($v =~ /^(.*?)<(.*?)>(.*)/) {
699						my $p1 = $1;
700						$cm = $2;
701						my $p3 = $3;
702
703						my $rv = decodecldr($enc, $cm);
704#						$rv = translate($enc, $cm)
705#							if (!defined $rv);
706						if (!defined $rv) {
707							print STDERR
708"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
709							$okay = 0;
710							next;
711						}
712
713						$v = $p1 . $rv . $p3;
714					}
715					$output .= "$v\n";
716					next;
717				}
718				if ($f eq "as") {
719					foreach my $v (split(/;/, $v)) {
720						$v =~ s/^"//;
721						$v =~ s/"$//;
722						my $cm = "";
723						while ($v =~ /^(.*?)<(.*?)>(.*)/) {
724							my $p1 = $1;
725							$cm = $2;
726							my $p3 = $3;
727
728							my $rv =
729							    decodecldr($enc,
730								$cm);
731#							$rv = translate($enc,
732#							    $cm)
733#							    if (!defined $rv);
734							if (!defined $rv) {
735								print STDERR
736"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
737								$okay = 0;
738								next;
739							}
740
741							$v = $1 . $rv . $3;
742						}
743						$output .= "$v\n";
744					}
745					next;
746				}
747
748				die("$k is '$f'");
749
750			}
751
752			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
753			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
754			print FOUT "$output# EOF\n";
755			close(FOUT);
756
757			if ($okay) {
758				rename("$TYPE.draft/$file.$enc.new",
759				    "$TYPE.draft/$file.$enc.src");
760			} else {
761				rename("$TYPE.draft/$file.$enc.new",
762				    "$TYPE.draft/$file.$enc.failed");
763			}
764		}
765	}
766	}
767	}
768}
769
770sub make_makefile {
771	return if ($#filter > -1);
772	print "Creating Makefile for $TYPE\n";
773	my $SRCOUT;
774	my $SRCOUT2;
775	my $MAPLOC;
776	if ($TYPE eq "colldef") {
777		$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
778			"\t-f \${MAPLOC}/map.UTF-8 " .
779			"\${.OBJDIR}/\${.IMPSRC:T:R}";
780		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
781				"locale/etc/final-maps\n";
782		$SRCOUT2 = "LC_COLLATE";
783	}
784	elsif ($TYPE eq "ctypedef") {
785		$SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
786			"\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
787			"\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
788			" || true";
789		$SRCOUT2 = "LC_CTYPE";
790		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
791				"locale/etc/final-maps\n";
792	}
793	else {
794		$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
795		$SRCOUT2 = "out";
796		$MAPLOC = "";
797	}
798	open(FOUT, ">$TYPE.draft/Makefile");
799	print FOUT <<EOF;
800# Warning: Do not edit. This file is automatically generated from the
801# tools in /usr/src/tools/tools/locale.
802
803LOCALEDIR=	/usr/share/locale
804FILESNAME=	$FILESNAMES{$TYPE}
805.SUFFIXES:	.src .${SRCOUT2}
806${MAPLOC}
807.src.${SRCOUT2}:
808	$SRCOUT
809
810## PLACEHOLDER
811
812EOF
813
814	foreach my $hash (keys(%hashtable)) {
815		# For colldef, weight LOCALES to UTF-8
816		#     Sort as upper-case and reverse to achieve it
817		#     Make en_US, ru_RU, and ca_AD preferred
818		my @files;
819		if ($TYPE eq "colldef") {
820			@files = sort {
821				if ($a eq 'en_x_US.UTF-8' ||
822				    $a eq 'ru_x_RU.UTF-8' ||
823				    $a eq 'ca_x_AD.UTF-8') { return -1; }
824				elsif ($b eq 'en_x_US.UTF-8' ||
825				       $b eq 'ru_x_RU.UTF-8' ||
826				       $b eq 'ca_x_AD.UTF-8') { return 1; }
827				else { return uc($b) cmp uc($a); }
828				} keys(%{$hashtable{$hash}});
829		} elsif ($TYPE eq "ctypedef") {
830			@files = sort {
831				if ($a =~ /^en_x_US/ ||
832				    $a =~ /^en_x_GB.ISO8859-15/ ||
833				    $a =~ /^ru_x_RU/) { return -1; }
834				elsif ($b =~ /^en_x_US/ ||
835				       $b =~ /^en_x_GB.ISO8859-15/ ||
836				       $b =~ /ru_x_RU/) { return 1; }
837				else { return uc($b) cmp uc($a); }
838
839				if ($a eq 'en_x_US.UTF-8') { return -1; }
840				elsif ($b eq 'en_x_US.UTF-8') { return 1; }
841				else { return uc($b) cmp uc($a); }
842				} keys(%{$hashtable{$hash}});
843		} else {
844			@files = sort {
845				if ($a =~ /COMMON/ ||
846				    $b =~ /^en_x_US.UT/) { return 1; }
847				elsif ($b =~ /COMMON/ ||
848				       $a =~ /^en_x_US.UT/) { return -1; }
849				else { return uc($b) cmp uc($a); }
850				} keys(%{$hashtable{$hash}});
851		}
852		if ($#files > 0) {
853			my $link = shift(@files);
854			$link =~ s/_x_/_/;	# strip family if none there
855			$link =~ s/en_COMMON/common/;
856			foreach my $file (@files) {
857				my @a = split(/_/, $file);
858				my @b = split(/\./, $a[-1]);
859				$file =~ s/_x_/_/;
860				$file =~ s/en_COMMON/common/;
861				print FOUT "SAME+=\t\t$link:$file\n";
862				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
863			}
864		}
865	}
866
867	foreach my $l (sort keys(%languages)) {
868	foreach my $f (sort keys(%{$languages{$l}})) {
869	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
870		next if ($#filter == 2 && ($filter[0] ne $l
871		    || $filter[1] ne $f || $filter[2] ne $c));
872		next if (defined $languages{$l}{$f}{definitions}
873		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
874		if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
875		 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
876			print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
877			    "${c} - not read\n";
878			next;
879		}
880		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
881			my $file = $l . "_";
882			$file .= $f . "_" if ($f ne "x");
883			$file .= $c;
884			if ($c eq "COMMON") { $file = "common"; }
885			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
886			print FOUT "LOCALES+=\t$file.$e\n";
887		}
888
889		if (defined $languages{$l}{$f}{nc_link}) {
890			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
891				my $file = $l . "_";
892				$file .= $f . "_" if ($f ne "x");
893				$file .= $c;
894				print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
895			}
896		}
897
898		if (defined $languages{$l}{$f}{e_link}) {
899			foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
900				my @a = split(/:/, $el);
901				my $file = $l . "_";
902				$file .= $f . "_" if ($f ne "x");
903				$file .= $c;
904				print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
905			}
906		}
907
908	}
909	}
910	}
911
912	print FOUT <<EOF;
913
914FILES=		\${LOCALES:S/\$/.${SRCOUT2}/}
915CLEANFILES=	\${FILES}
916
917.for f in \${SAME}
918SYMLINKS+=	../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
919.endfor
920
921.for f in \${LOCALES}
922FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
923.endfor
924
925.include <bsd.prog.mk>
926EOF
927
928	close(FOUT);
929}
930