xref: /dragonfly/tools/tools/locale/tools/cldr2def.pl (revision 279dd846)
1#!/usr/local/bin/perl -wC
2
3use strict;
4use File::Copy;
5use XML::Parser;
6use Tie::IxHash;
7use Data::Dumper;
8use Getopt::Long;
9use Digest::SHA qw(sha1_hex);
10require "charmaps.pm";
11
12
13if ($#ARGV < 2) {
14	print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15	exit(1);
16}
17
18my $DEFENCODING = "UTF-8";
19my @filter = ();
20
21my $CLDRDIR = undef;
22my $UNIDATADIR = undef;
23my $ETCDIR = undef;
24my $TYPE = undef;
25my $doonly = undef;
26
27my $result = GetOptions (
28		"cldr=s"	=> \$CLDRDIR,
29		"unidata=s"	=> \$UNIDATADIR,
30		"etc=s"		=> \$ETCDIR,
31		"type=s"	=> \$TYPE,
32		"lc=s"		=> \$doonly
33	    );
34
35my %convertors = ();
36
37my %ucd = ();
38my %values = ();
39my %hashtable = ();
40my %languages = ();
41my %translations = ();
42my %encodings = ();
43my %alternativemonths = ();
44get_languages();
45
46my %utf8map = ();
47my %utf8aliases = ();
48get_unidata($UNIDATADIR);
49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50get_encodings("$ETCDIR/charmaps");
51
52my %keys = ();
53tie(%keys, "Tie::IxHash");
54tie(%hashtable, "Tie::IxHash");
55
56my %FILESNAMES = (
57	"monetdef"	=> "LC_MONETARY",
58	"timedef"	=> "LC_TIME",
59	"msgdef"	=> "LC_MESSAGES",
60	"numericdef"	=> "LC_NUMERIC",
61	"colldef"	=> "LC_COLLATE",
62	"ctypedef"	=> "LC_CTYPE"
63);
64
65my %callback = (
66	mdorder => \&callback_mdorder,
67	altmon => \&callback_altmon,
68	cformat => \&callback_cformat,
69	data => undef,
70);
71
72my %DESC = (
73
74	# numericdef
75	"decimal_point"	=> "decimal_point",
76	"thousands_sep"	=> "thousands_sep",
77	"grouping"	=> "grouping",
78
79	# monetdef
80	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
81				   "SPACE)",
82	"currency_symbol"	=> "currency_symbol",
83	"mon_decimal_point"	=> "mon_decimal_point",
84	"mon_thousands_sep"	=> "mon_thousands_sep",
85	"mon_grouping"		=> "mon_grouping",
86	"positive_sign"		=> "positive_sign",
87	"negative_sign"		=> "negative_sign",
88	"int_frac_digits"	=> "int_frac_digits",
89	"frac_digits"		=> "frac_digits",
90	"p_cs_precedes"		=> "p_cs_precedes",
91	"p_sep_by_space"	=> "p_sep_by_space",
92	"n_cs_precedes"		=> "n_cs_precedes",
93	"n_sep_by_space"	=> "n_sep_by_space",
94	"p_sign_posn"		=> "p_sign_posn",
95	"n_sign_posn"		=> "n_sign_posn",
96
97	# msgdef
98	"yesexpr"	=> "yesexpr",
99	"noexpr"	=> "noexpr",
100	"yesstr"	=> "yesstr",
101	"nostr"		=> "nostr",
102
103	# timedef
104	"abmon"		=> "Short month names",
105	"mon"		=> "Long month names (as in a date)",
106	"abday"		=> "Short weekday names",
107	"day"		=> "Long weekday names",
108	"t_fmt"		=> "X_fmt",
109	"d_fmt"		=> "x_fmt",
110	"c_fmt"		=> "c_fmt",
111	"am_pm"		=> "AM/PM",
112	"d_t_fmt"	=> "date_fmt",
113	"altmon"	=> "Long month names (without case ending)",
114	"md_order"	=> "md_order",
115	"t_fmt_ampm"	=> "ampm_fmt",
116);
117
118if ($TYPE eq "colldef") {
119	transform_collation();
120	make_makefile();
121}
122
123if ($TYPE eq "ctypedef") {
124	transform_ctypes();
125	make_makefile();
126}
127
128if ($TYPE eq "numericdef") {
129	%keys = (
130	    "decimal_point"	=> "s",
131	    "thousands_sep"	=> "s",
132	    "grouping"		=> "ai",
133	);
134	get_fields();
135	print_fields();
136	make_makefile();
137}
138
139if ($TYPE eq "monetdef") {
140	%keys = (
141	    "int_curr_symbol"	=> "s",
142	    "currency_symbol"	=> "s",
143	    "mon_decimal_point"	=> "s",
144	    "mon_thousands_sep"	=> "s",
145	    "mon_grouping"	=> "ai",
146	    "positive_sign"	=> "s",
147	    "negative_sign"	=> "s",
148	    "int_frac_digits"	=> "i",
149	    "frac_digits"	=> "i",
150	    "p_cs_precedes"	=> "i",
151	    "p_sep_by_space"	=> "i",
152	    "n_cs_precedes"	=> "i",
153	    "n_sep_by_space"	=> "i",
154	    "p_sign_posn"	=> "i",
155	    "n_sign_posn"	=> "i"
156	);
157	get_fields();
158	print_fields();
159	make_makefile();
160}
161
162if ($TYPE eq "msgdef") {
163	%keys = (
164	    "yesexpr"		=> "s",
165	    "noexpr"		=> "s",
166	    "yesstr"		=> "s",
167	    "nostr"		=> "s"
168	);
169	get_fields();
170	print_fields();
171	make_makefile();
172}
173
174if ($TYPE eq "timedef") {
175	%keys = (
176	    "abmon"		=> "as",
177	    "mon"		=> "as",
178	    "abday"		=> "as",
179	    "day"		=> "as",
180	    "t_fmt"		=> "s",
181	    "d_fmt"		=> "s",
182	    "c_fmt"		=> "<cformat<d_t_fmt<s",
183	    "am_pm"		=> "as",
184	    "d_fmt"		=> "s",
185	    "d_t_fmt"		=> "s",
186	    "altmon"		=> "<altmon<mon<as",
187	    "md_order"		=> "<mdorder<d_fmt<s",
188	    "t_fmt_ampm"	=> "s",
189	);
190	get_fields();
191	print_fields();
192	make_makefile();
193}
194
195sub callback_cformat {
196 	my $s = shift;
197 	$s =~ s/ %Z//;
198 	$s =~ s/ %z//;
199 	return $s;
200};
201
202sub callback_mdorder {
203	my $s = shift;
204	return undef if (!defined $s);
205	$s =~ s/[^dm]//g;
206	return $s;
207};
208
209sub callback_altmon {
210	# if the language/country is known in %alternative months then
211	# return that, otherwise repeat mon
212	my $s = shift;
213
214	if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
215		my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
216		my @cleaned;
217		foreach (@altnames)
218		{
219			$_ =~ s/^\s+//;
220			$_ =~ s/\s+$//;
221			push @cleaned, $_;
222		}
223		return join(";",@cleaned);
224	}
225
226	return $s;
227}
228
229############################
230
231sub get_unidata {
232	my $directory = shift;
233
234	open(FIN, "$directory/UnicodeData.txt")
235	    or die("Cannot open $directory/UnicodeData.txt");;
236	my @lines = <FIN>;
237	chomp(@lines);
238	close(FIN);
239
240	foreach my $l (@lines) {
241		my @a = split(/;/, $l);
242
243		$ucd{code2name}{"$a[0]"} = $a[1];	# Unicode name
244		$ucd{name2code}{"$a[1]"} = $a[0];	# Unicode code
245	}
246}
247
248sub get_utf8map {
249	my $file = shift;
250
251	open(FIN, $file);
252	my @lines = <FIN>;
253	close(FIN);
254	chomp(@lines);
255
256	my $prev_k = undef;
257	my $prev_v = "";
258	my $incharmap = 0;
259	foreach my $l (@lines) {
260		$l =~ s/\r//;
261		next if ($l =~ /^\#/);
262		next if ($l eq "");
263
264		if ($l eq "CHARMAP") {
265			$incharmap = 1;
266			next;
267		}
268
269		next if (!$incharmap);
270		last if ($l eq "END CHARMAP");
271
272		$l =~ /^<([^\s]+)>\s+(.*)/;
273		my $k = $1;
274		my $v = $2;
275		$k =~ s/_/ /g;		# unicode char string
276		$v =~ s/\\x//g;		# UTF-8 char code
277		$utf8map{$k} = $v;
278
279		$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
280
281		$prev_v = $v;
282		$prev_k = $k;
283	}
284}
285
286sub get_encodings {
287	my $dir = shift;
288	foreach my $e (sort(keys(%encodings))) {
289		if (!open(FIN, "$dir/$e.TXT")) {
290			print "Cannot open charmap for $e\n";
291			next;
292
293		}
294		$encodings{$e} = 1;
295		my @lines = <FIN>;
296		close(FIN);
297		chomp(@lines);
298		foreach my $l (@lines) {
299			$l =~ s/\r//;
300			next if ($l =~ /^\#/);
301			next if ($l eq "");
302
303			my @a = split(" ", $l);
304			next if ($#a < 1);
305			$a[0] =~ s/^0[xX]//;	# local char code
306			$a[1] =~ s/^0[xX]//;	# unicode char code
307			$convertors{$e}{uc($a[1])} = uc($a[0]);
308		}
309	}
310}
311
312sub get_languages {
313	my %data = get_xmldata($ETCDIR);
314	%languages = %{$data{L}};
315	%translations = %{$data{T}};
316	%alternativemonths = %{$data{AM}};
317	%encodings = %{$data{E}};
318
319	return if (!defined $doonly);
320
321	my @a = split(/_/, $doonly);
322	if ($#a == 1) {
323		$filter[0] = $a[0];
324		$filter[1] = "x";
325		$filter[2] = $a[1];
326	} elsif ($#a == 2) {
327		$filter[0] = $a[0];
328		$filter[1] = $a[1];
329		$filter[2] = $a[2];
330	}
331
332	print Dumper(@filter);
333	return;
334}
335
336sub transform_ctypes {
337	foreach my $l (sort keys(%languages)) {
338	foreach my $f (sort keys(%{$languages{$l}})) {
339	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
340		next if ($#filter == 2 && ($filter[0] ne $l
341		    || $filter[1] ne $f || $filter[2] ne $c));
342		next if (defined $languages{$l}{$f}{definitions}
343		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
344		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
345		my $file;
346		$file = $l . "_";
347		$file .= $f . "_" if ($f ne "x");
348		$file .= $c;
349		my $actfile = $file;
350
351		my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
352		if (! -f $filename) {
353			print STDERR "Cannot open $filename\n";
354			next;
355		}
356		open(FIN, "$filename");
357		print "Reading from $filename for ${l}_${f}_${c}\n";
358		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
359		my @lines;
360		my $shex;
361		my $uhex;
362		while (<FIN>) {
363			push @lines, $_;
364		}
365		close(FIN);
366		$shex = sha1_hex(join("\n", @lines));
367		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
368		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
369		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
370		print FOUT @lines;
371		close(FOUT);
372		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
373			next if ($enc eq $DEFENCODING);
374			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
375			if (! -f $filename) {
376				print STDERR "Cannot open $filename\n";
377				next;
378			}
379			@lines = ();
380			open(FIN, "$filename");
381			while (<FIN>) {
382				if ((/^comment_char\s/) || (/^escape_char\s/)){
383					push @lines, $_;
384				}
385				if (/^LC_CTYPE/../^END LC_CTYPE/) {
386					push @lines, $_;
387				}
388			}
389			close(FIN);
390			$uhex = sha1_hex(join("\n", @lines) . $enc);
391			$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
392			$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
393			open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
394			print FOUT <<EOF;
395# Warning: Do not edit. This file is automatically extracted from the
396# tools in /usr/src/tools/tools/locale. The data is obtained from the
397# CLDR project, obtained from http://cldr.unicode.org/
398# -----------------------------------------------------------------------------
399EOF
400			print FOUT @lines;
401			close(FOUT);
402		}
403	}
404	}
405	}
406}
407
408
409sub transform_collation {
410	foreach my $l (sort keys(%languages)) {
411	foreach my $f (sort keys(%{$languages{$l}})) {
412	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
413		next if ($#filter == 2 && ($filter[0] ne $l
414		    || $filter[1] ne $f || $filter[2] ne $c));
415		next if (defined $languages{$l}{$f}{definitions}
416		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
417		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
418		my $file;
419		$file = $l . "_";
420		$file .= $f . "_" if ($f ne "x");
421		$file .= $c;
422		my $actfile = $file;
423
424		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
425		$filename = "$ETCDIR/$file.$DEFENCODING.src"
426		    if (! -f $filename);
427		if (! -f $filename
428		 && defined $languages{$l}{$f}{fallback}) {
429			$file = $languages{$l}{$f}{fallback};
430			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
431		}
432		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
433		    if (! -f $filename);
434		if (! -f $filename) {
435			print STDERR
436			    "Cannot open $file.$DEFENCODING.src or fallback\n";
437			next;
438		}
439		open(FIN, "$filename");
440		print "Reading from $filename for ${l}_${f}_${c}\n";
441		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
442		my @lines;
443		my $shex;
444		while (<FIN>) {
445			if ((/^comment_char\s/) || (/^escape_char\s/)){
446				push @lines, $_;
447			}
448			if (/^LC_COLLATE/../^END LC_COLLATE/) {
449				$_ =~ s/[ ]+/ /g;
450				push @lines, $_;
451			}
452		}
453		close(FIN);
454		$shex = sha1_hex(join("\n", @lines));
455		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
456		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
457		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
458		print FOUT <<EOF;
459# Warning: Do not edit. This file is automatically extracted from the
460# tools in /usr/src/tools/tools/locale. The data is obtained from the
461# CLDR project, obtained from http://cldr.unicode.org/
462# -----------------------------------------------------------------------------
463EOF
464		print FOUT @lines;
465		close(FOUT);
466
467		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
468			next if ($enc eq $DEFENCODING);
469			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
470			      "$TYPE.draft/$actfile.$enc.src");
471			$languages{$l}{$f}{data}{$c}{$enc} = $shex;
472			$hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
473		}
474	}
475	}
476	}
477}
478
479sub get_fields {
480	foreach my $l (sort keys(%languages)) {
481	foreach my $f (sort keys(%{$languages{$l}})) {
482	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
483		next if ($#filter == 2 && ($filter[0] ne $l
484		    || $filter[1] ne $f || $filter[2] ne $c));
485		next if (defined $languages{$l}{$f}{definitions}
486		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
487
488		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
489		my $file;
490		$file = $l . "_";
491		$file .= $f . "_" if ($f ne "x");
492		$file .= $c;
493
494		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
495		$filename = "$ETCDIR/$file.$DEFENCODING.src"
496		    if (! -f $filename);
497		if (! -f $filename
498		 && defined $languages{$l}{$f}{fallback}) {
499			$file = $languages{$l}{$f}{fallback};
500			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
501		}
502		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
503		    if (! -f $filename);
504		if (! -f $filename) {
505			print STDERR
506			    "Cannot open $file.$DEFENCODING.src or fallback\n";
507			next;
508		}
509		open(FIN, "$filename");
510		print "Reading from $filename for ${l}_${f}_${c}\n";
511		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
512		my @lines = <FIN>;
513		chomp(@lines);
514		close(FIN);
515		my $continue = 0;
516		foreach my $k (keys(%keys)) {
517			foreach my $line (@lines) {
518				$line =~ s/\r//;
519				next if (!$continue && $line !~ /^$k\s/);
520				if ($continue) {
521					$line =~ s/^\s+//;
522				} else {
523					$line =~ s/^$k\s+//;
524				}
525
526				$values{$l}{$c}{$k} = ""
527					if (!defined $values{$l}{$c}{$k});
528
529				$continue = ($line =~ /\/$/);
530				$line =~ s/\/$// if ($continue);
531
532				while ($line =~ /_/) {
533					$line =~
534					    s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
535				}
536				die "_ in data - $line" if ($line =~ /_/);
537				$values{$l}{$c}{$k} .= $line;
538
539				last if (!$continue);
540			}
541		}
542	}
543	}
544	}
545}
546
547sub decodecldr {
548	my $e = shift;
549	my $s = shift;
550
551	my $v = undef;
552
553	if ($e eq "UTF-8") {
554		#
555		# Conversion to UTF-8 can be done from the Unicode name to
556		# the UTF-8 character code.
557		#
558		$v = $utf8map{$s};
559		die "Cannot convert $s in $e (charmap)" if (!defined $v);
560	} else {
561		#
562		# Conversion to these encodings can be done from the Unicode
563		# name to Unicode code to the encodings code.
564		#
565		my $ucc = undef;
566		$ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
567		$ucc = $ucd{name2code}{$utf8aliases{$s}}
568			if (!defined $ucc
569			 && $utf8aliases{$s}
570			 && defined $ucd{name2code}{$utf8aliases{$s}});
571
572		if (!defined $ucc) {
573			if (defined $translations{$e}{$s}{hex}) {
574				$v = $translations{$e}{$s}{hex};
575				$ucc = 0;
576			} elsif (defined $translations{$e}{$s}{ucc}) {
577				$ucc = $translations{$e}{$s}{ucc};
578			}
579		}
580
581		die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
582		$v = $convertors{$e}{$ucc} if (!defined $v);
583
584		$v = $translations{$e}{$s}{hex}
585			if (!defined $v && defined $translations{$e}{$s}{hex});
586
587		if (!defined $v && defined $translations{$e}{$s}{unicode}) {
588			my $ucn = $translations{$e}{$s}{unicode};
589			$ucc = $ucd{name2code}{$ucn}
590				if (defined $ucd{name2code}{$ucn});
591			$ucc = $ucd{name2code}{$utf8aliases{$ucn}}
592				if (!defined $ucc
593				 && defined $ucd{name2code}{$utf8aliases{$ucn}});
594			$v = $convertors{$e}{$ucc};
595		}
596
597		die "Cannot convert $s in $e (charmap)" if (!defined $v);
598	}
599
600	return pack("C", hex($v)) if (length($v) == 2);
601	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
602		if (length($v) == 4);
603	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
604	    hex(substr($v, 4, 2))) if (length($v) == 6);
605	print STDERR "Cannot convert $e $s\n";
606	return "length = " . length($v);
607
608}
609
610sub translate {
611	my $enc = shift;
612	my $v = shift;
613
614	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
615	return undef;
616}
617
618sub print_fields {
619	foreach my $l (sort keys(%languages)) {
620	foreach my $f (sort keys(%{$languages{$l}})) {
621	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
622		next if ($#filter == 2 && ($filter[0] ne $l
623		    || $filter[1] ne $f || $filter[2] ne $c));
624		next if (defined $languages{$l}{$f}{definitions}
625		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
626		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
627			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
628				print "Skipping ${l}_" .
629				    ($f eq "x" ? "" : "${f}_") .
630				    "${c} - not read\n";
631				next;
632			}
633			my $file = $l;
634			$file .= "_" . $f if ($f ne "x");
635			$file .= "_" . $c;
636			print "Writing to $file in $enc\n";
637
638			if ($enc ne $DEFENCODING &&
639			    !defined $convertors{$enc}) {
640				print "Failed! Cannot convert to $enc.\n";
641				next;
642			};
643
644			open(FOUT, ">$TYPE.draft/$file.$enc.new");
645			my $okay = 1;
646			my $output = "";
647			print FOUT <<EOF;
648# Warning: Do not edit. This file is automatically generated from the
649# tools in /usr/src/tools/tools/locale. The data is obtained from the
650# CLDR project, obtained from http://cldr.unicode.org/
651# -----------------------------------------------------------------------------
652EOF
653			foreach my $k (keys(%keys)) {
654				my $f = $keys{$k};
655
656				die("Unknown $k in \%DESC")
657					if (!defined $DESC{$k});
658
659				$output .= "#\n# $DESC{$k}\n";
660
661				# Replace one row with another
662				if ($f =~ /^>/) {
663					$k = substr($f, 1);
664					$f = $keys{$k};
665				}
666
667				# Callback function
668				if ($f =~ /^\</) {
669					$callback{data}{c} = $c;
670					$callback{data}{k} = $k;
671					$callback{data}{l} = $l;
672					$callback{data}{e} = $enc;
673					my @a = split(/\</, substr($f, 1));
674					my $rv =
675					    &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
676					$values{$l}{$c}{$k} = $rv;
677					$f = $a[2];
678					$callback{data} = ();
679				}
680
681				my $v = $values{$l}{$c}{$k};
682				$v = "undef" if (!defined $v);
683
684				if ($f eq "i") {
685					$output .= "$v\n";
686					next;
687				}
688				if ($f eq "ai") {
689					$output .= "$v\n";
690					next;
691				}
692				if ($f eq "s") {
693					$v =~ s/^"//;
694					$v =~ s/"$//;
695					my $cm = "";
696					while ($v =~ /^(.*?)<(.*?)>(.*)/) {
697						my $p1 = $1;
698						$cm = $2;
699						my $p3 = $3;
700
701						my $rv = decodecldr($enc, $cm);
702#						$rv = translate($enc, $cm)
703#							if (!defined $rv);
704						if (!defined $rv) {
705							print STDERR
706"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
707							$okay = 0;
708							next;
709						}
710
711						$v = $p1 . $rv . $p3;
712					}
713					$output .= "$v\n";
714					next;
715				}
716				if ($f eq "as") {
717					foreach my $v (split(/;/, $v)) {
718						$v =~ s/^"//;
719						$v =~ s/"$//;
720						my $cm = "";
721						while ($v =~ /^(.*?)<(.*?)>(.*)/) {
722							my $p1 = $1;
723							$cm = $2;
724							my $p3 = $3;
725
726							my $rv =
727							    decodecldr($enc,
728								$cm);
729#							$rv = translate($enc,
730#							    $cm)
731#							    if (!defined $rv);
732							if (!defined $rv) {
733								print STDERR
734"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
735								$okay = 0;
736								next;
737							}
738
739							$v = $1 . $rv . $3;
740						}
741						$output .= "$v\n";
742					}
743					next;
744				}
745
746				die("$k is '$f'");
747
748			}
749
750			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
751			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
752			print FOUT "$output# EOF\n";
753			close(FOUT);
754
755			if ($okay) {
756				rename("$TYPE.draft/$file.$enc.new",
757				    "$TYPE.draft/$file.$enc.src");
758			} else {
759				rename("$TYPE.draft/$file.$enc.new",
760				    "$TYPE.draft/$file.$enc.failed");
761			}
762		}
763	}
764	}
765	}
766}
767
768sub make_makefile {
769	return if ($#filter > -1);
770	print "Creating Makefile for $TYPE\n";
771	my $SRCOUT;
772	my $SRCOUT2;
773	my $MAPLOC;
774	if ($TYPE eq "colldef") {
775		$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
776			"\t-f \${MAPLOC}/map.UTF-8 " .
777			"\${.OBJDIR}/\${.IMPSRC:T:R}";
778		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
779				"locale/etc/final-maps\n";
780		$SRCOUT2 = "LC_COLLATE";
781	}
782	elsif ($TYPE eq "ctypedef") {
783		$SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
784			"\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
785			"\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
786			" || true";
787		$SRCOUT2 = "LC_CTYPE";
788		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
789				"locale/etc/final-maps\n";
790	}
791	else {
792		$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
793		$SRCOUT2 = "out";
794		$MAPLOC = "";
795	}
796	open(FOUT, ">$TYPE.draft/Makefile");
797	print FOUT <<EOF;
798# Warning: Do not edit. This file is automatically generated from the
799# tools in /usr/src/tools/tools/locale.
800
801LOCALEDIR=	/usr/share/locale
802FILESNAME=	$FILESNAMES{$TYPE}
803.SUFFIXES:	.src .${SRCOUT2}
804${MAPLOC}
805.src.${SRCOUT2}:
806	$SRCOUT
807
808## PLACEHOLDER
809
810EOF
811
812	foreach my $hash (keys(%hashtable)) {
813		# For colldef, weight LOCALES to UTF-8
814		#     Sort as upper-case and reverse to achieve it
815		#     Make en_US, ru_RU, and ca_AD preferred
816		my @files;
817		if ($TYPE eq "colldef") {
818			@files = sort {
819				if ($a eq 'en_x_US.UTF-8' ||
820				    $a eq 'ru_x_RU.UTF-8' ||
821				    $a eq 'ca_x_AD.UTF-8') { return -1; }
822				elsif ($b eq 'en_x_US.UTF-8' ||
823				       $b eq 'ru_x_RU.UTF-8' ||
824				       $b eq 'ca_x_AD.UTF-8') { return 1; }
825				else { return uc($b) cmp uc($a); }
826				} keys(%{$hashtable{$hash}});
827		} elsif ($TYPE eq "ctypedef") {
828			@files = sort {
829				if ($a eq 'en_x_US.UTF-8') { return -1; }
830				elsif ($b eq 'en_x_US.UTF-8') { return 1; }
831				if ($a =~ /^en_x_US/) { return -1; }
832				elsif ($b =~ /^en_x_US/) { return 1; }
833
834				if ($a =~ /^en_x_GB.ISO8859-15/ ||
835				    $a =~ /^ru_x_RU/) { return -1; }
836				elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
837				       $b =~ /ru_x_RU/) { return 1; }
838				else { return uc($b) cmp uc($a); }
839
840				} keys(%{$hashtable{$hash}});
841		} else {
842			@files = sort {
843				if ($a =~ /_Comm_/ ||
844				    $b eq 'en_x_US.UTF-8') { return 1; }
845				elsif ($b =~ /_Comm_/ ||
846				       $a eq 'en_x_US.UTF-8') { return -1; }
847				else { return uc($b) cmp uc($a); }
848				} keys(%{$hashtable{$hash}});
849		}
850		if ($#files > 0) {
851			my $link = shift(@files);
852			$link =~ s/_x_/_/;	# strip family if none there
853			foreach my $file (@files) {
854				my @a = split(/_/, $file);
855				my @b = split(/\./, $a[-1]);
856				$file =~ s/_x_/_/;
857				print FOUT "SAME+=\t\t$link:$file\n";
858				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
859			}
860		}
861	}
862
863	foreach my $l (sort keys(%languages)) {
864	foreach my $f (sort keys(%{$languages{$l}})) {
865	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
866		next if ($#filter == 2 && ($filter[0] ne $l
867		    || $filter[1] ne $f || $filter[2] ne $c));
868		next if (defined $languages{$l}{$f}{definitions}
869		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
870		if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
871		 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
872			print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
873			    "${c} - not read\n";
874			next;
875		}
876		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
877			my $file = $l . "_";
878			$file .= $f . "_" if ($f ne "x");
879			$file .= $c;
880			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
881			print FOUT "LOCALES+=\t$file.$e\n";
882		}
883
884		if (defined $languages{$l}{$f}{nc_link}) {
885			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
886				my $file = $l . "_";
887				$file .= $f . "_" if ($f ne "x");
888				$file .= $c;
889				print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
890			}
891		}
892
893		if (defined $languages{$l}{$f}{e_link}) {
894			foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
895				my @a = split(/:/, $el);
896				my $file = $l . "_";
897				$file .= $f . "_" if ($f ne "x");
898				$file .= $c;
899				print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
900			}
901		}
902
903	}
904	}
905	}
906
907	print FOUT <<EOF;
908
909FILES=		\${LOCALES:S/\$/.${SRCOUT2}/}
910CLEANFILES=	\${FILES}
911
912.for f in \${SAME}
913SYMLINKS+=	../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
914.endfor
915
916.for f in \${LOCALES}
917FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
918.endfor
919
920.include <bsd.prog.mk>
921EOF
922
923	close(FOUT);
924}
925