xref: /freebsd/tools/tools/locale/tools/cldr2def.pl (revision 148a8da8)
1#!/usr/local/bin/perl -wC
2# $FreeBSD$
3
4use strict;
5use File::Copy;
6use XML::Parser;
7use Tie::IxHash;
8use Text::Iconv;
9#use Data::Dumper;
10use Getopt::Long;
11use Digest::SHA qw(sha1_hex);
12require "charmaps.pm";
13
14
15if ($#ARGV < 2) {
16	print "Usage: $0 --unidir=<unidir> --etc=<etcdir> --type=<type>\n";
17	exit(1);
18}
19
20my $DEFENCODING = "UTF-8";
21
22my $UNIDIR = undef;
23my $ETCDIR = undef;
24my $TYPE = undef;
25
26my $result = GetOptions (
27		"unidir=s"	=> \$UNIDIR,
28		"etc=s"		=> \$ETCDIR,
29		"type=s"	=> \$TYPE,
30	    );
31
32my %convertors = ();
33
34my %ucd = ();
35my %values = ();
36my %hashtable = ();
37my %languages = ();
38my %translations = ();
39my %encodings = ();
40my %alternativemonths = ();
41get_languages();
42
43my %utf8map = ();
44my %utf8aliases = ();
45get_unidata($UNIDIR);
46get_utf8map("$UNIDIR/posix/$DEFENCODING.cm");
47get_encodings("$ETCDIR/charmaps");
48
49my %keys = ();
50tie(%keys, "Tie::IxHash");
51tie(%hashtable, "Tie::IxHash");
52
53my %FILESNAMES = (
54	"monetdef"	=> "LC_MONETARY",
55	"timedef"	=> "LC_TIME",
56	"msgdef"	=> "LC_MESSAGES",
57	"numericdef"	=> "LC_NUMERIC",
58	"colldef"	=> "LC_COLLATE",
59	"ctypedef"	=> "LC_CTYPE"
60);
61
62my %callback = (
63	mdorder => \&callback_mdorder,
64	altmon => \&callback_altmon,
65	cformat => \&callback_cformat,
66	dformat => \&callback_dformat,
67	dtformat => \&callback_dtformat,
68	cbabmon => \&callback_abmon,
69	cbampm => \&callback_ampm,
70	data => undef,
71);
72
73my %DESC = (
74
75	# numericdef
76	"decimal_point"	=> "decimal_point",
77	"thousands_sep"	=> "thousands_sep",
78	"grouping"	=> "grouping",
79
80	# monetdef
81	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
82				   "SPACE)",
83	"currency_symbol"	=> "currency_symbol",
84	"mon_decimal_point"	=> "mon_decimal_point",
85	"mon_thousands_sep"	=> "mon_thousands_sep",
86	"mon_grouping"		=> "mon_grouping",
87	"positive_sign"		=> "positive_sign",
88	"negative_sign"		=> "negative_sign",
89	"int_frac_digits"	=> "int_frac_digits",
90	"frac_digits"		=> "frac_digits",
91	"p_cs_precedes"		=> "p_cs_precedes",
92	"p_sep_by_space"	=> "p_sep_by_space",
93	"n_cs_precedes"		=> "n_cs_precedes",
94	"n_sep_by_space"	=> "n_sep_by_space",
95	"p_sign_posn"		=> "p_sign_posn",
96	"n_sign_posn"		=> "n_sign_posn",
97
98	# msgdef
99	"yesexpr"	=> "yesexpr",
100	"noexpr"	=> "noexpr",
101	"yesstr"	=> "yesstr",
102	"nostr"		=> "nostr",
103
104	# timedef
105	"abmon"		=> "Short month names",
106	"mon"		=> "Long month names (as in a date)",
107	"abday"		=> "Short weekday names",
108	"day"		=> "Long weekday names",
109	"t_fmt"		=> "X_fmt",
110	"d_fmt"		=> "x_fmt",
111	"c_fmt"		=> "c_fmt",
112	"am_pm"		=> "AM/PM",
113	"d_t_fmt"	=> "date_fmt",
114	"altmon"	=> "Long month names (without case ending)",
115	"md_order"	=> "md_order",
116	"t_fmt_ampm"	=> "ampm_fmt",
117);
118
119if ($TYPE eq "colldef") {
120	transform_collation();
121	make_makefile();
122}
123
124if ($TYPE eq "ctypedef") {
125	transform_ctypes();
126	make_makefile();
127}
128
129if ($TYPE eq "numericdef") {
130	%keys = (
131	    "decimal_point"	=> "s",
132	    "thousands_sep"	=> "s",
133	    "grouping"		=> "ai",
134	);
135	get_fields();
136	print_fields();
137	make_makefile();
138}
139
140if ($TYPE eq "monetdef") {
141	%keys = (
142	    "int_curr_symbol"	=> "s",
143	    "currency_symbol"	=> "s",
144	    "mon_decimal_point"	=> "s",
145	    "mon_thousands_sep"	=> "s",
146	    "mon_grouping"	=> "ai",
147	    "positive_sign"	=> "s",
148	    "negative_sign"	=> "s",
149	    "int_frac_digits"	=> "i",
150	    "frac_digits"	=> "i",
151	    "p_cs_precedes"	=> "i",
152	    "p_sep_by_space"	=> "i",
153	    "n_cs_precedes"	=> "i",
154	    "n_sep_by_space"	=> "i",
155	    "p_sign_posn"	=> "i",
156	    "n_sign_posn"	=> "i"
157	);
158	get_fields();
159	print_fields();
160	make_makefile();
161}
162
163if ($TYPE eq "msgdef") {
164	%keys = (
165	    "yesexpr"		=> "s",
166	    "noexpr"		=> "s",
167	    "yesstr"		=> "s",
168	    "nostr"		=> "s"
169	);
170	get_fields();
171	print_fields();
172	make_makefile();
173}
174
175if ($TYPE eq "timedef") {
176	%keys = (
177	    "abmon"		=> "<cbabmon<abmon<as",
178	    "mon"		=> "as",
179	    "abday"		=> "as",
180	    "day"		=> "as",
181	    "t_fmt"		=> "s",
182	    "d_fmt"		=> "<dformat<d_fmt<s",
183	    "c_fmt"		=> "<cformat<d_t_fmt<s",
184	    "am_pm"		=> "<cbampm<am_pm<as",
185	    "d_t_fmt"		=> "<dtformat<d_t_fmt<s",
186	    "altmon"		=> "<altmon<mon<as",
187	    "md_order"		=> "<mdorder<d_fmt<s",
188	    "t_fmt_ampm"	=> "s",
189	);
190	get_fields();
191	print_fields();
192	make_makefile();
193}
194
195sub callback_ampm {
196	my $s = shift;
197	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
198	my $enc = $callback{data}{e};
199
200	if ($nl eq 'ru_RU') {
201		if ($enc eq 'UTF-8') {
202			$s = 'дп;пп';
203		} else {
204			my  $converter = Text::Iconv->new("utf-8", "$enc");
205			$s = $converter->convert("дп;пп");
206		}
207	}
208	return $s;
209}
210
211sub callback_cformat {
212	my $s = shift;
213	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
214
215	if ($nl eq 'ko_KR') {
216		$s =~ s/(> )(%p)/$1%A $2/;
217	}
218	$s =~ s/\.,/\./;
219	$s =~ s/ %Z//;
220	$s =~ s/ %z//;
221	$s =~ s/^"%e\./%A %e/;
222	$s =~ s/^"(%B %e, )/"%A, $1/;
223	$s =~ s/^"(%e %B )/"%A $1/;
224	return $s;
225};
226
227sub callback_dformat {
228	my $s = shift;
229
230	$s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/;
231	$s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/;
232	return $s;
233};
234
235sub callback_dtformat {
236	my $s = shift;
237	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
238
239	if ($nl eq 'ja_JP') {
240		$s =~ s/(> )(%H)/$1%A $2/;
241	} elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') {
242		if ($nl ne 'ko_KR') {
243			$s =~ s/%m/%_m/;
244		}
245		$s =~ s/(> )(%p)/$1%A $2/;
246	}
247	$s =~ s/\.,/\./;
248	$s =~ s/^"%e\./%A %e/;
249	$s =~ s/^"(%B %e, )/"%A, $1/;
250	$s =~ s/^"(%e %B )/"%A $1/;
251	return $s;
252};
253
254sub callback_mdorder {
255	my $s = shift;
256	return undef if (!defined $s);
257	$s =~ s/[^dem]//g;
258	$s =~ s/e/d/g;
259	return $s;
260};
261
262sub callback_altmon {
263	# if the language/country is known in %alternative months then
264	# return that, otherwise repeat mon
265	my $s = shift;
266
267	if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
268		my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
269		my @cleaned;
270		foreach (@altnames)
271		{
272			$_ =~ s/^\s+//;
273			$_ =~ s/\s+$//;
274			push @cleaned, $_;
275		}
276		return join(";",@cleaned);
277	}
278
279	return $s;
280}
281
282sub callback_abmon {
283	# for specified CJK locales, pad result with a space to enable
284	# columns to line up (style established in FreeBSD in 2001)
285	my $s = shift;
286	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
287
288	if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
289	    $nl eq 'zh_HK' || $nl eq 'zh_TW') {
290		my @monthnames = split(";", $s);
291		my @cleaned;
292		foreach (@monthnames)
293		{
294			if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
295			   ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
296			{
297				$_ =~ s/^"/"<space>/;
298			}
299			push @cleaned, $_;
300		}
301		return join(";",@cleaned);
302	}
303	return $s;
304}
305
306############################
307
308sub get_unidata {
309	my $directory = shift;
310
311	open(FIN, "$directory/UnicodeData.txt")
312	    or die("Cannot open $directory/UnicodeData.txt");;
313	my @lines = <FIN>;
314	chomp(@lines);
315	close(FIN);
316
317	foreach my $l (@lines) {
318		my @a = split(/;/, $l);
319
320		$ucd{code2name}{"$a[0]"} = $a[1];	# Unicode name
321		$ucd{name2code}{"$a[1]"} = $a[0];	# Unicode code
322	}
323}
324
325sub get_utf8map {
326	my $file = shift;
327
328	open(FIN, $file);
329	my @lines = <FIN>;
330	close(FIN);
331	chomp(@lines);
332
333	my $prev_k = undef;
334	my $prev_v = "";
335	my $incharmap = 0;
336	foreach my $l (@lines) {
337		$l =~ s/\r//;
338		next if ($l =~ /^\#/);
339		next if ($l eq "");
340
341		if ($l eq "CHARMAP") {
342			$incharmap = 1;
343			next;
344		}
345
346		next if (!$incharmap);
347		last if ($l eq "END CHARMAP");
348
349		$l =~ /^<([^\s]+)>\s+(.*)/;
350		my $k = $1;
351		my $v = $2;
352		$k =~ s/_/ /g;		# unicode char string
353		$v =~ s/\\x//g;		# UTF-8 char code
354		$utf8map{$k} = $v;
355
356		$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
357
358		$prev_v = $v;
359		$prev_k = $k;
360	}
361}
362
363sub get_encodings {
364	my $dir = shift;
365	foreach my $e (sort(keys(%encodings))) {
366		if (!open(FIN, "$dir/$e.TXT")) {
367			print "Cannot open charmap for $e\n";
368			next;
369
370		}
371		$encodings{$e} = 1;
372		my @lines = <FIN>;
373		close(FIN);
374		chomp(@lines);
375		foreach my $l (@lines) {
376			$l =~ s/\r//;
377			next if ($l =~ /^\#/);
378			next if ($l eq "");
379
380			my @a = split(" ", $l);
381			next if ($#a < 1);
382			$a[0] =~ s/^0[xX]//;	# local char code
383			$a[1] =~ s/^0[xX]//;	# unicode char code
384			$convertors{$e}{uc($a[1])} = uc($a[0]);
385		}
386	}
387}
388
389sub get_languages {
390	my %data = get_xmldata($ETCDIR);
391	%languages = %{$data{L}};
392	%translations = %{$data{T}};
393	%alternativemonths = %{$data{AM}};
394	%encodings = %{$data{E}};
395}
396
397sub transform_ctypes {
398	# Add the C.UTF-8
399	$languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef;
400
401	foreach my $l (sort keys(%languages)) {
402	foreach my $f (sort keys(%{$languages{$l}})) {
403	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
404		next if (defined $languages{$l}{$f}{definitions}
405		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
406		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
407		my $file = $l;
408		$file .= "_" . $f if ($f ne "x");
409		$file .= "_" . $c if ($c ne "x");
410		my $actfile = $file;
411
412		my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src";
413		if (! -f $filename) {
414			print STDERR "Cannot open $filename\n";
415			next;
416		}
417		open(FIN, "$filename");
418		print "Reading from $filename for ${l}_${f}_${c}\n";
419		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
420		my @lines;
421		my $shex;
422		my $uhex;
423		while (<FIN>) {
424			push @lines, $_;
425		}
426		close(FIN);
427		$shex = sha1_hex(join("\n", @lines));
428		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
429		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
430		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
431		print FOUT @lines;
432		close(FOUT);
433		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
434			next if ($enc eq $DEFENCODING);
435			$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
436			if (! -f $filename) {
437				print STDERR "Cannot open $filename\n";
438				next;
439			}
440			@lines = ();
441			open(FIN, "$filename");
442			while (<FIN>) {
443				if ((/^comment_char\s/) || (/^escape_char\s/)){
444					push @lines, $_;
445				}
446				if (/^LC_CTYPE/../^END LC_CTYPE/) {
447					push @lines, $_;
448				}
449			}
450			close(FIN);
451			$uhex = sha1_hex(join("\n", @lines) . $enc);
452			$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
453			$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
454			open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
455			print FOUT <<EOF;
456# Warning: Do not edit. This file is automatically extracted from the
457# tools in /usr/src/tools/tools/locale. The data is obtained from the
458# CLDR project, obtained from http://cldr.unicode.org/
459# -----------------------------------------------------------------------------
460EOF
461			print FOUT @lines;
462			close(FOUT);
463		}
464	}
465	}
466	}
467}
468
469
470sub transform_collation {
471	foreach my $l (sort keys(%languages)) {
472	foreach my $f (sort keys(%{$languages{$l}})) {
473	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
474		next if (defined $languages{$l}{$f}{definitions}
475		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
476		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
477		my $file;
478		$file = $l . "_";
479		$file .= $f . "_" if ($f ne "x");
480		$file .= $c;
481		my $actfile = $file;
482
483		my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
484		$filename = "$ETCDIR/$file.$DEFENCODING.src"
485		    if (! -f $filename);
486		if (! -f $filename
487		 && defined $languages{$l}{$f}{fallback}) {
488			$file = $languages{$l}{$f}{fallback};
489			$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
490		}
491		$filename = "$UNIDIR/posix/$file.$DEFENCODING.src"
492		    if (! -f $filename);
493		if (! -f $filename) {
494			print STDERR
495			    "Cannot open $file.$DEFENCODING.src or fallback\n";
496			next;
497		}
498		open(FIN, "$filename");
499		print "Reading from $filename for ${l}_${f}_${c}\n";
500		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
501		my @lines;
502		my $shex;
503		while (<FIN>) {
504			if ((/^comment_char\s/) || (/^escape_char\s/)){
505				push @lines, $_;
506			}
507			if (/^LC_COLLATE/../^END LC_COLLATE/) {
508				$_ =~ s/[ ]+/ /g;
509				push @lines, $_;
510			}
511		}
512		close(FIN);
513		$shex = sha1_hex(join("\n", @lines));
514		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
515		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
516		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
517		print FOUT <<EOF;
518# Warning: Do not edit. This file is automatically extracted from the
519# tools in /usr/src/tools/tools/locale. The data is obtained from the
520# CLDR project, obtained from http://cldr.unicode.org/
521# -----------------------------------------------------------------------------
522EOF
523		print FOUT @lines;
524		close(FOUT);
525
526		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
527			next if ($enc eq $DEFENCODING);
528			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
529			      "$TYPE.draft/$actfile.$enc.src");
530			$languages{$l}{$f}{data}{$c}{$enc} = $shex;
531			$hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
532		}
533	}
534	}
535	}
536}
537
538sub get_fields {
539	foreach my $l (sort keys(%languages)) {
540	foreach my $f (sort keys(%{$languages{$l}})) {
541	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
542		next if (defined $languages{$l}{$f}{definitions}
543		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
544
545		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
546		my $file;
547		$file = $l . "_";
548		$file .= $f . "_" if ($f ne "x");
549		$file .= $c;
550
551		my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
552		$filename = "$ETCDIR/$file.$DEFENCODING.src"
553		    if (! -f $filename);
554		if (! -f $filename
555		 && defined $languages{$l}{$f}{fallback}) {
556			$file = $languages{$l}{$f}{fallback};
557			$filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
558		}
559		$filename = "$UNIDIR/posix/$file.$DEFENCODING.src"
560		    if (! -f $filename);
561		if (! -f $filename) {
562			print STDERR
563			    "Cannot open $file.$DEFENCODING.src or fallback\n";
564			next;
565		}
566		open(FIN, "$filename");
567		print "Reading from $filename for ${l}_${f}_${c}\n";
568		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
569		my @lines = <FIN>;
570		chomp(@lines);
571		close(FIN);
572		my $continue = 0;
573		foreach my $k (keys(%keys)) {
574			foreach my $line (@lines) {
575				$line =~ s/\r//;
576				next if (!$continue && $line !~ /^$k\s/);
577				if ($continue) {
578					$line =~ s/^\s+//;
579				} else {
580					$line =~ s/^$k\s+//;
581				}
582
583				$values{$l}{$f}{$c}{$k} = ""
584					if (!defined $values{$l}{$f}{$c}{$k});
585
586				$continue = ($line =~ /\/$/);
587				$line =~ s/\/$// if ($continue);
588
589				while ($line =~ /_/) {
590					$line =~
591					    s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
592				}
593				die "_ in data - $line" if ($line =~ /_/);
594				$values{$l}{$f}{$c}{$k} .= $line;
595
596				last if (!$continue);
597			}
598		}
599	}
600	}
601	}
602}
603
604sub decodecldr {
605	my $e = shift;
606	my $s = shift;
607
608	my $v = undef;
609
610	if ($e eq "UTF-8") {
611		#
612		# Conversion to UTF-8 can be done from the Unicode name to
613		# the UTF-8 character code.
614		#
615		$v = $utf8map{$s};
616		die "Cannot convert $s in $e (charmap)" if (!defined $v);
617	} else {
618		#
619		# Conversion to these encodings can be done from the Unicode
620		# name to Unicode code to the encodings code.
621		#
622		my $ucc = undef;
623		$ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
624		$ucc = $ucd{name2code}{$utf8aliases{$s}}
625			if (!defined $ucc
626			 && $utf8aliases{$s}
627			 && defined $ucd{name2code}{$utf8aliases{$s}});
628
629		if (!defined $ucc) {
630			if (defined $translations{$e}{$s}{hex}) {
631				$v = $translations{$e}{$s}{hex};
632				$ucc = 0;
633			} elsif (defined $translations{$e}{$s}{ucc}) {
634				$ucc = $translations{$e}{$s}{ucc};
635			}
636		}
637
638		die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
639		$v = $convertors{$e}{$ucc} if (!defined $v);
640
641		$v = $translations{$e}{$s}{hex}
642			if (!defined $v && defined $translations{$e}{$s}{hex});
643
644		if (!defined $v && defined $translations{$e}{$s}{unicode}) {
645			my $ucn = $translations{$e}{$s}{unicode};
646			$ucc = $ucd{name2code}{$ucn}
647				if (defined $ucd{name2code}{$ucn});
648			$ucc = $ucd{name2code}{$utf8aliases{$ucn}}
649				if (!defined $ucc
650				 && defined $ucd{name2code}{$utf8aliases{$ucn}});
651			$v = $convertors{$e}{$ucc};
652		}
653
654		die "Cannot convert $s in $e (charmap)" if (!defined $v);
655	}
656
657	return pack("C", hex($v)) if (length($v) == 2);
658	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
659		if (length($v) == 4);
660	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
661	    hex(substr($v, 4, 2))) if (length($v) == 6);
662	print STDERR "Cannot convert $e $s\n";
663	return "length = " . length($v);
664
665}
666
667sub translate {
668	my $enc = shift;
669	my $v = shift;
670
671	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
672	return undef;
673}
674
675sub print_fields {
676	foreach my $l (sort keys(%languages)) {
677	foreach my $f (sort keys(%{$languages{$l}})) {
678	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
679		next if (defined $languages{$l}{$f}{definitions}
680		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
681		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
682			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
683				print "Skipping ${l}_" .
684				    ($f eq "x" ? "" : "${f}_") .
685				    "${c} - not read\n";
686				next;
687			}
688			my $file = $l;
689			$file .= "_" . $f if ($f ne "x");
690			$file .= "_" . $c;
691			print "Writing to $file in $enc\n";
692
693			if ($enc ne $DEFENCODING &&
694			    !defined $convertors{$enc}) {
695				print "Failed! Cannot convert to $enc.\n";
696				next;
697			};
698
699			open(FOUT, ">$TYPE.draft/$file.$enc.new");
700			my $okay = 1;
701			my $output = "";
702			print FOUT <<EOF;
703# Warning: Do not edit. This file is automatically generated from the
704# tools in /usr/src/tools/tools/locale. The data is obtained from the
705# CLDR project, obtained from http://cldr.unicode.org/
706# -----------------------------------------------------------------------------
707EOF
708			foreach my $k (keys(%keys)) {
709				my $g = $keys{$k};
710
711				die("Unknown $k in \%DESC")
712					if (!defined $DESC{$k});
713
714				$output .= "#\n# $DESC{$k}\n";
715
716				# Replace one row with another
717				if ($g =~ /^>/) {
718					$k = substr($g, 1);
719					$g = $keys{$k};
720				}
721
722				# Callback function
723				if ($g =~ /^\</) {
724					$callback{data}{c} = $c;
725					$callback{data}{k} = $k;
726					$callback{data}{f} = $f;
727					$callback{data}{l} = $l;
728					$callback{data}{e} = $enc;
729					my @a = split(/\</, substr($g, 1));
730					my $rv =
731					    &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]});
732					$values{$l}{$f}{$c}{$k} = $rv;
733					$g = $a[2];
734					$callback{data} = ();
735				}
736
737				my $v = $values{$l}{$f}{$c}{$k};
738				$v = "undef" if (!defined $v);
739
740				if ($g eq "i") {
741					$output .= "$v\n";
742					next;
743				}
744				if ($g eq "ai") {
745					$output .= "$v\n";
746					next;
747				}
748				if ($g eq "s") {
749					$v =~ s/^"//;
750					$v =~ s/"$//;
751					my $cm = "";
752					while ($v =~ /^(.*?)<(.*?)>(.*)/) {
753						my $p1 = $1;
754						$cm = $2;
755						my $p3 = $3;
756
757						my $rv = decodecldr($enc, $cm);
758#						$rv = translate($enc, $cm)
759#							if (!defined $rv);
760						if (!defined $rv) {
761							print STDERR
762"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
763							$okay = 0;
764							next;
765						}
766
767						$v = $p1 . $rv . $p3;
768					}
769					$output .= "$v\n";
770					next;
771				}
772				if ($g eq "as") {
773					foreach my $v (split(/;/, $v)) {
774						$v =~ s/^"//;
775						$v =~ s/"$//;
776						my $cm = "";
777						while ($v =~ /^(.*?)<(.*?)>(.*)/) {
778							my $p1 = $1;
779							$cm = $2;
780							my $p3 = $3;
781
782							my $rv =
783							    decodecldr($enc,
784								$cm);
785#							$rv = translate($enc,
786#							    $cm)
787#							    if (!defined $rv);
788							if (!defined $rv) {
789								print STDERR
790"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
791								$okay = 0;
792								next;
793							}
794
795							$v = $1 . $rv . $3;
796						}
797						$output .= "$v\n";
798					}
799					next;
800				}
801
802				die("$k is '$g'");
803
804			}
805
806			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
807			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
808			print FOUT "$output# EOF\n";
809			close(FOUT);
810
811			if ($okay) {
812				rename("$TYPE.draft/$file.$enc.new",
813				    "$TYPE.draft/$file.$enc.src");
814			} else {
815				rename("$TYPE.draft/$file.$enc.new",
816				    "$TYPE.draft/$file.$enc.failed");
817			}
818		}
819	}
820	}
821	}
822}
823
824sub make_makefile {
825	print "Creating Makefile for $TYPE\n";
826	my $SRCOUT;
827	my $SRCOUT2;
828	my $SRCOUT3 = "";
829	my $SRCOUT4 = "";
830	my $MAPLOC;
831	if ($TYPE eq "colldef") {
832		$SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " .
833			"-i \${.IMPSRC} \\\n" .
834			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
835			"\${.OBJDIR}/\${.IMPSRC:T:R}";
836		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
837				"locale/etc/final-maps\n";
838		$SRCOUT2 = "LC_COLLATE";
839		$SRCOUT3 = "" .
840			".for f t in \${LOCALES_MAPPED}\n" .
841			"FILES+=\t\$t.LC_COLLATE\n" .
842			"FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" .
843			"\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
844			"\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " .
845			"-i \${.ALLSRC} \\\n" .
846			"\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
847			"\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
848			".endfor\n\n";
849		$SRCOUT4 = "## LOCALES_MAPPED\n";
850	}
851	elsif ($TYPE eq "ctypedef") {
852		$SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " .
853			"-w \${MAPLOC}/widths.txt \\\n" .
854			"\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
855			"\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
856			" || true";
857		$SRCOUT2 = "LC_CTYPE";
858		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
859				"locale/etc/final-maps\n";
860		$SRCOUT3 = "## SYMPAIRS\n\n" .
861			".for s t in \${SYMPAIRS}\n" .
862			"\${t:S/src\$/LC_CTYPE/}: " .
863			"\$s\n" .
864			"\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " .
865			"-w \${MAPLOC}/widths.txt \\\n" .
866			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
867			"\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
868			" || true\n" .
869			".endfor\n\n";
870	}
871	else {
872		$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
873		$SRCOUT2 = "out";
874		$MAPLOC = "";
875	}
876	open(FOUT, ">$TYPE.draft/Makefile");
877	print FOUT <<EOF;
878# \$FreeBSD\$
879# Warning: Do not edit. This file is automatically generated from the
880# tools in /usr/src/tools/tools/locale.
881
882LOCALEDIR=	\${SHAREDIR}/locale
883FILESNAME=	$FILESNAMES{$TYPE}
884.SUFFIXES:	.src .${SRCOUT2}
885${MAPLOC}
886EOF
887
888	if ($TYPE eq "colldef" || $TYPE eq "ctypedef") {
889		print FOUT <<EOF;
890.include <bsd.endian.mk>
891
892EOF
893	}
894
895	print FOUT <<EOF;
896.src.${SRCOUT2}:
897	$SRCOUT
898
899## PLACEHOLDER
900
901${SRCOUT4}
902
903EOF
904
905	foreach my $hash (keys(%hashtable)) {
906		# For colldef, weight LOCALES to UTF-8
907		#     Sort as upper-case and reverse to achieve it
908		#     Make en_US, ru_RU, and ca_AD preferred
909		my @files;
910		if ($TYPE eq "colldef") {
911			@files = sort {
912				if ($a eq 'en_x_US.UTF-8' ||
913				    $a eq 'ru_x_RU.UTF-8' ||
914				    $a eq 'ca_x_AD.UTF-8') { return -1; }
915				elsif ($b eq 'en_x_US.UTF-8' ||
916				       $b eq 'ru_x_RU.UTF-8' ||
917				       $b eq 'ca_x_AD.UTF-8') { return 1; }
918				else { return uc($b) cmp uc($a); }
919				} keys(%{$hashtable{$hash}});
920		} elsif ($TYPE eq "ctypedef") {
921			@files = sort {
922				if ($a eq 'C_x_x.UTF-8') { return -1; }
923				elsif ($b eq 'C_x_x.UTF-8') { return 1; }
924				if ($a =~ /^en_x_US/) { return -1; }
925				elsif ($b =~ /^en_x_US/) { return 1; }
926
927				if ($a =~ /^en_x_GB.ISO8859-15/ ||
928				    $a =~ /^ru_x_RU/) { return -1; }
929				elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
930				       $b =~ /ru_x_RU/) { return 1; }
931				else { return uc($b) cmp uc($a); }
932
933				} keys(%{$hashtable{$hash}});
934		} else {
935			@files = sort {
936				if ($a =~ /_Comm_/ ||
937				    $b eq 'en_x_US.UTF-8') { return 1; }
938				elsif ($b =~ /_Comm_/ ||
939				       $a eq 'en_x_US.UTF-8') { return -1; }
940				else { return uc($b) cmp uc($a); }
941				} keys(%{$hashtable{$hash}});
942		}
943		if ($#files > 0) {
944			my $link = shift(@files);
945			$link =~ s/_x_x//;	# special case for C
946			$link =~ s/_x_/_/;	# strip family if none there
947			foreach my $file (@files) {
948				my @a = split(/_/, $file);
949				my @b = split(/\./, $a[-1]);
950				$file =~ s/_x_/_/;
951				print FOUT "SAME+=\t\t$link $file\n";
952				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
953			}
954		}
955	}
956
957	foreach my $l (sort keys(%languages)) {
958	foreach my $f (sort keys(%{$languages{$l}})) {
959	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
960		next if (defined $languages{$l}{$f}{definitions}
961		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
962		if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
963		 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
964			print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
965			    "${c} - not read\n";
966			next;
967		}
968		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
969			my $file = $l;
970			$file .= "_" . $f if ($f ne "x");
971			$file .= "_" . $c if ($c ne "x");
972			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
973			print FOUT "LOCALES+=\t$file.$e\n";
974		}
975
976		if (defined $languages{$l}{$f}{nc_link}) {
977			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
978				my $file = $l . "_";
979				$file .= $f . "_" if ($f ne "x");
980				$file .= $c;
981				print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
982			}
983		}
984
985		if (defined $languages{$l}{$f}{e_link}) {
986			foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
987				my @a = split(/:/, $el);
988				my $file = $l . "_";
989				$file .= $f . "_" if ($f ne "x");
990				$file .= $c;
991				print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
992			}
993		}
994
995	}
996	}
997	}
998
999	print FOUT <<EOF;
1000
1001FILES=		\${LOCALES:S/\$/.${SRCOUT2}/}
1002CLEANFILES=	\${FILES}
1003
1004.for f t in \${SAME}
1005SYMLINKS+=	../\$f/\${FILESNAME} \\
1006    \${LOCALEDIR}/\$t/\${FILESNAME}
1007.endfor
1008
1009.for f in \${LOCALES}
1010FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
1011.endfor
1012
1013${SRCOUT3}.include <bsd.prog.mk>
1014EOF
1015
1016	close(FOUT);
1017}
1018