xref: /freebsd/tools/tools/locale/tools/cldr2def.pl (revision b0b1dbdd)
1#!/usr/local/bin/perl -wC
2# $FreeBSD$
3
4use strict;
5use File::Copy;
6use XML::Parser;
7use Tie::IxHash;
8use Text::Iconv;
9use Data::Dumper;
10use Getopt::Long;
11use Digest::SHA qw(sha1_hex);
12require "charmaps.pm";
13
14
15if ($#ARGV < 2) {
16	print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
17	exit(1);
18}
19
20my $DEFENCODING = "UTF-8";
21my @filter = ();
22
23my $CLDRDIR = undef;
24my $UNIDATADIR = undef;
25my $ETCDIR = undef;
26my $TYPE = undef;
27my $doonly = undef;
28
29my $result = GetOptions (
30		"cldr=s"	=> \$CLDRDIR,
31		"unidata=s"	=> \$UNIDATADIR,
32		"etc=s"		=> \$ETCDIR,
33		"type=s"	=> \$TYPE,
34		"lc=s"		=> \$doonly
35	    );
36
37my %convertors = ();
38
39my %ucd = ();
40my %values = ();
41my %hashtable = ();
42my %languages = ();
43my %translations = ();
44my %encodings = ();
45my %alternativemonths = ();
46get_languages();
47
48my %utf8map = ();
49my %utf8aliases = ();
50get_unidata($UNIDATADIR);
51get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
52get_encodings("$ETCDIR/charmaps");
53
54my %keys = ();
55tie(%keys, "Tie::IxHash");
56tie(%hashtable, "Tie::IxHash");
57
58my %FILESNAMES = (
59	"monetdef"	=> "LC_MONETARY",
60	"timedef"	=> "LC_TIME",
61	"msgdef"	=> "LC_MESSAGES",
62	"numericdef"	=> "LC_NUMERIC",
63	"colldef"	=> "LC_COLLATE",
64	"ctypedef"	=> "LC_CTYPE"
65);
66
67my %callback = (
68	mdorder => \&callback_mdorder,
69	altmon => \&callback_altmon,
70	cformat => \&callback_cformat,
71	dformat => \&callback_dformat,
72	dtformat => \&callback_dtformat,
73	cbabmon => \&callback_abmon,
74	cbampm => \&callback_ampm,
75	data => undef,
76);
77
78my %DESC = (
79
80	# numericdef
81	"decimal_point"	=> "decimal_point",
82	"thousands_sep"	=> "thousands_sep",
83	"grouping"	=> "grouping",
84
85	# monetdef
86	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
87				   "SPACE)",
88	"currency_symbol"	=> "currency_symbol",
89	"mon_decimal_point"	=> "mon_decimal_point",
90	"mon_thousands_sep"	=> "mon_thousands_sep",
91	"mon_grouping"		=> "mon_grouping",
92	"positive_sign"		=> "positive_sign",
93	"negative_sign"		=> "negative_sign",
94	"int_frac_digits"	=> "int_frac_digits",
95	"frac_digits"		=> "frac_digits",
96	"p_cs_precedes"		=> "p_cs_precedes",
97	"p_sep_by_space"	=> "p_sep_by_space",
98	"n_cs_precedes"		=> "n_cs_precedes",
99	"n_sep_by_space"	=> "n_sep_by_space",
100	"p_sign_posn"		=> "p_sign_posn",
101	"n_sign_posn"		=> "n_sign_posn",
102
103	# msgdef
104	"yesexpr"	=> "yesexpr",
105	"noexpr"	=> "noexpr",
106	"yesstr"	=> "yesstr",
107	"nostr"		=> "nostr",
108
109	# timedef
110	"abmon"		=> "Short month names",
111	"mon"		=> "Long month names (as in a date)",
112	"abday"		=> "Short weekday names",
113	"day"		=> "Long weekday names",
114	"t_fmt"		=> "X_fmt",
115	"d_fmt"		=> "x_fmt",
116	"c_fmt"		=> "c_fmt",
117	"am_pm"		=> "AM/PM",
118	"d_t_fmt"	=> "date_fmt",
119	"altmon"	=> "Long month names (without case ending)",
120	"md_order"	=> "md_order",
121	"t_fmt_ampm"	=> "ampm_fmt",
122);
123
124if ($TYPE eq "colldef") {
125	transform_collation();
126	make_makefile();
127}
128
129if ($TYPE eq "ctypedef") {
130	transform_ctypes();
131	make_makefile();
132}
133
134if ($TYPE eq "numericdef") {
135	%keys = (
136	    "decimal_point"	=> "s",
137	    "thousands_sep"	=> "s",
138	    "grouping"		=> "ai",
139	);
140	get_fields();
141	print_fields();
142	make_makefile();
143}
144
145if ($TYPE eq "monetdef") {
146	%keys = (
147	    "int_curr_symbol"	=> "s",
148	    "currency_symbol"	=> "s",
149	    "mon_decimal_point"	=> "s",
150	    "mon_thousands_sep"	=> "s",
151	    "mon_grouping"	=> "ai",
152	    "positive_sign"	=> "s",
153	    "negative_sign"	=> "s",
154	    "int_frac_digits"	=> "i",
155	    "frac_digits"	=> "i",
156	    "p_cs_precedes"	=> "i",
157	    "p_sep_by_space"	=> "i",
158	    "n_cs_precedes"	=> "i",
159	    "n_sep_by_space"	=> "i",
160	    "p_sign_posn"	=> "i",
161	    "n_sign_posn"	=> "i"
162	);
163	get_fields();
164	print_fields();
165	make_makefile();
166}
167
168if ($TYPE eq "msgdef") {
169	%keys = (
170	    "yesexpr"		=> "s",
171	    "noexpr"		=> "s",
172	    "yesstr"		=> "s",
173	    "nostr"		=> "s"
174	);
175	get_fields();
176	print_fields();
177	make_makefile();
178}
179
180if ($TYPE eq "timedef") {
181	%keys = (
182	    "abmon"		=> "<cbabmon<abmon<as",
183	    "mon"		=> "as",
184	    "abday"		=> "as",
185	    "day"		=> "as",
186	    "t_fmt"		=> "s",
187	    "d_fmt"		=> "<dformat<d_fmt<s",
188	    "c_fmt"		=> "<cformat<d_t_fmt<s",
189	    "am_pm"		=> "<cbampm<am_pm<as",
190	    "d_t_fmt"		=> "<dtformat<d_t_fmt<s",
191	    "altmon"		=> "<altmon<mon<as",
192	    "md_order"		=> "<mdorder<d_fmt<s",
193	    "t_fmt_ampm"	=> "s",
194	);
195	get_fields();
196	print_fields();
197	make_makefile();
198}
199
200sub callback_ampm {
201	my $s = shift;
202	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
203	my $enc = $callback{data}{e};
204
205	if ($nl eq 'ru_RU') {
206		if ($enc eq 'UTF-8') {
207			$s = 'дп;пп';
208		} else {
209			my  $converter = Text::Iconv->new("utf-8", "$enc");
210			$s = $converter->convert("дп;пп");
211		}
212	}
213	return $s;
214}
215
216sub callback_cformat {
217	my $s = shift;
218	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
219
220	if ($nl eq 'ko_KR') {
221		$s =~ s/(> )(%p)/$1%A $2/;
222	}
223	$s =~ s/\.,/\./;
224	$s =~ s/ %Z//;
225	$s =~ s/ %z//;
226	$s =~ s/^"%e\./%A %e/;
227	$s =~ s/^"(%B %e, )/"%A, $1/;
228	$s =~ s/^"(%e %B )/"%A $1/;
229	return $s;
230};
231
232sub callback_dformat {
233	my $s = shift;
234
235	$s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/;
236	$s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/;
237	return $s;
238};
239
240sub callback_dtformat {
241	my $s = shift;
242	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
243
244	if ($nl eq 'ja_JP') {
245		$s =~ s/(> )(%H)/$1%A $2/;
246	} elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') {
247		if ($nl ne 'ko_KR') {
248			$s =~ s/%m/%_m/;
249		}
250		$s =~ s/(> )(%p)/$1%A $2/;
251	}
252	$s =~ s/\.,/\./;
253	$s =~ s/^"%e\./%A %e/;
254	$s =~ s/^"(%B %e, )/"%A, $1/;
255	$s =~ s/^"(%e %B )/"%A $1/;
256	return $s;
257};
258
259sub callback_mdorder {
260	my $s = shift;
261	return undef if (!defined $s);
262	$s =~ s/[^dem]//g;
263	$s =~ s/e/d/g;
264	return $s;
265};
266
267sub callback_altmon {
268	# if the language/country is known in %alternative months then
269	# return that, otherwise repeat mon
270	my $s = shift;
271
272	if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
273		my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
274		my @cleaned;
275		foreach (@altnames)
276		{
277			$_ =~ s/^\s+//;
278			$_ =~ s/\s+$//;
279			push @cleaned, $_;
280		}
281		return join(";",@cleaned);
282	}
283
284	return $s;
285}
286
287sub callback_abmon {
288	# for specified CJK locales, pad result with a space to enable
289	# columns to line up (style established in FreeBSD in 2001)
290	my $s = shift;
291	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
292
293	if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
294	    $nl eq 'zh_HK' || $nl eq 'zh_TW') {
295		my @monthnames = split(";", $s);
296		my @cleaned;
297		foreach (@monthnames)
298		{
299			if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
300			   ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
301			{
302				$_ =~ s/^"/"<space>/;
303			}
304			push @cleaned, $_;
305		}
306		return join(";",@cleaned);
307	}
308	return $s;
309}
310
311############################
312
313sub get_unidata {
314	my $directory = shift;
315
316	open(FIN, "$directory/UnicodeData.txt")
317	    or die("Cannot open $directory/UnicodeData.txt");;
318	my @lines = <FIN>;
319	chomp(@lines);
320	close(FIN);
321
322	foreach my $l (@lines) {
323		my @a = split(/;/, $l);
324
325		$ucd{code2name}{"$a[0]"} = $a[1];	# Unicode name
326		$ucd{name2code}{"$a[1]"} = $a[0];	# Unicode code
327	}
328}
329
330sub get_utf8map {
331	my $file = shift;
332
333	open(FIN, $file);
334	my @lines = <FIN>;
335	close(FIN);
336	chomp(@lines);
337
338	my $prev_k = undef;
339	my $prev_v = "";
340	my $incharmap = 0;
341	foreach my $l (@lines) {
342		$l =~ s/\r//;
343		next if ($l =~ /^\#/);
344		next if ($l eq "");
345
346		if ($l eq "CHARMAP") {
347			$incharmap = 1;
348			next;
349		}
350
351		next if (!$incharmap);
352		last if ($l eq "END CHARMAP");
353
354		$l =~ /^<([^\s]+)>\s+(.*)/;
355		my $k = $1;
356		my $v = $2;
357		$k =~ s/_/ /g;		# unicode char string
358		$v =~ s/\\x//g;		# UTF-8 char code
359		$utf8map{$k} = $v;
360
361		$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
362
363		$prev_v = $v;
364		$prev_k = $k;
365	}
366}
367
368sub get_encodings {
369	my $dir = shift;
370	foreach my $e (sort(keys(%encodings))) {
371		if (!open(FIN, "$dir/$e.TXT")) {
372			print "Cannot open charmap for $e\n";
373			next;
374
375		}
376		$encodings{$e} = 1;
377		my @lines = <FIN>;
378		close(FIN);
379		chomp(@lines);
380		foreach my $l (@lines) {
381			$l =~ s/\r//;
382			next if ($l =~ /^\#/);
383			next if ($l eq "");
384
385			my @a = split(" ", $l);
386			next if ($#a < 1);
387			$a[0] =~ s/^0[xX]//;	# local char code
388			$a[1] =~ s/^0[xX]//;	# unicode char code
389			$convertors{$e}{uc($a[1])} = uc($a[0]);
390		}
391	}
392}
393
394sub get_languages {
395	my %data = get_xmldata($ETCDIR);
396	%languages = %{$data{L}};
397	%translations = %{$data{T}};
398	%alternativemonths = %{$data{AM}};
399	%encodings = %{$data{E}};
400
401	return if (!defined $doonly);
402
403	my @a = split(/_/, $doonly);
404	if ($#a == 1) {
405		$filter[0] = $a[0];
406		$filter[1] = "x";
407		$filter[2] = $a[1];
408	} elsif ($#a == 2) {
409		$filter[0] = $a[0];
410		$filter[1] = $a[1];
411		$filter[2] = $a[2];
412	}
413
414	print Dumper(@filter);
415	return;
416}
417
418sub transform_ctypes {
419	foreach my $l (sort keys(%languages)) {
420	foreach my $f (sort keys(%{$languages{$l}})) {
421	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
422		next if ($#filter == 2 && ($filter[0] ne $l
423		    || $filter[1] ne $f || $filter[2] ne $c));
424		next if (defined $languages{$l}{$f}{definitions}
425		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
426		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
427		my $file;
428		$file = $l . "_";
429		$file .= $f . "_" if ($f ne "x");
430		$file .= $c;
431		my $actfile = $file;
432
433		my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
434		if (! -f $filename) {
435			print STDERR "Cannot open $filename\n";
436			next;
437		}
438		open(FIN, "$filename");
439		print "Reading from $filename for ${l}_${f}_${c}\n";
440		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
441		my @lines;
442		my $shex;
443		my $uhex;
444		while (<FIN>) {
445			push @lines, $_;
446		}
447		close(FIN);
448		$shex = sha1_hex(join("\n", @lines));
449		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
450		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
451		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
452		print FOUT @lines;
453		close(FOUT);
454		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
455			next if ($enc eq $DEFENCODING);
456			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
457			if (! -f $filename) {
458				print STDERR "Cannot open $filename\n";
459				next;
460			}
461			@lines = ();
462			open(FIN, "$filename");
463			while (<FIN>) {
464				if ((/^comment_char\s/) || (/^escape_char\s/)){
465					push @lines, $_;
466				}
467				if (/^LC_CTYPE/../^END LC_CTYPE/) {
468					push @lines, $_;
469				}
470			}
471			close(FIN);
472			$uhex = sha1_hex(join("\n", @lines) . $enc);
473			$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
474			$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
475			open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
476			print FOUT <<EOF;
477# Warning: Do not edit. This file is automatically extracted from the
478# tools in /usr/src/tools/tools/locale. The data is obtained from the
479# CLDR project, obtained from http://cldr.unicode.org/
480# -----------------------------------------------------------------------------
481EOF
482			print FOUT @lines;
483			close(FOUT);
484		}
485	}
486	}
487	}
488}
489
490
491sub transform_collation {
492	foreach my $l (sort keys(%languages)) {
493	foreach my $f (sort keys(%{$languages{$l}})) {
494	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
495		next if ($#filter == 2 && ($filter[0] ne $l
496		    || $filter[1] ne $f || $filter[2] ne $c));
497		next if (defined $languages{$l}{$f}{definitions}
498		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
499		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
500		my $file;
501		$file = $l . "_";
502		$file .= $f . "_" if ($f ne "x");
503		$file .= $c;
504		my $actfile = $file;
505
506		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
507		$filename = "$ETCDIR/$file.$DEFENCODING.src"
508		    if (! -f $filename);
509		if (! -f $filename
510		 && defined $languages{$l}{$f}{fallback}) {
511			$file = $languages{$l}{$f}{fallback};
512			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
513		}
514		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
515		    if (! -f $filename);
516		if (! -f $filename) {
517			print STDERR
518			    "Cannot open $file.$DEFENCODING.src or fallback\n";
519			next;
520		}
521		open(FIN, "$filename");
522		print "Reading from $filename for ${l}_${f}_${c}\n";
523		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
524		my @lines;
525		my $shex;
526		while (<FIN>) {
527			if ((/^comment_char\s/) || (/^escape_char\s/)){
528				push @lines, $_;
529			}
530			if (/^LC_COLLATE/../^END LC_COLLATE/) {
531				$_ =~ s/[ ]+/ /g;
532				push @lines, $_;
533			}
534		}
535		close(FIN);
536		$shex = sha1_hex(join("\n", @lines));
537		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
538		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
539		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
540		print FOUT <<EOF;
541# Warning: Do not edit. This file is automatically extracted from the
542# tools in /usr/src/tools/tools/locale. The data is obtained from the
543# CLDR project, obtained from http://cldr.unicode.org/
544# -----------------------------------------------------------------------------
545EOF
546		print FOUT @lines;
547		close(FOUT);
548
549		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
550			next if ($enc eq $DEFENCODING);
551			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
552			      "$TYPE.draft/$actfile.$enc.src");
553			$languages{$l}{$f}{data}{$c}{$enc} = $shex;
554			$hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
555		}
556	}
557	}
558	}
559}
560
561sub get_fields {
562	foreach my $l (sort keys(%languages)) {
563	foreach my $f (sort keys(%{$languages{$l}})) {
564	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
565		next if ($#filter == 2 && ($filter[0] ne $l
566		    || $filter[1] ne $f || $filter[2] ne $c));
567		next if (defined $languages{$l}{$f}{definitions}
568		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
569
570		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
571		my $file;
572		$file = $l . "_";
573		$file .= $f . "_" if ($f ne "x");
574		$file .= $c;
575
576		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
577		$filename = "$ETCDIR/$file.$DEFENCODING.src"
578		    if (! -f $filename);
579		if (! -f $filename
580		 && defined $languages{$l}{$f}{fallback}) {
581			$file = $languages{$l}{$f}{fallback};
582			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
583		}
584		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
585		    if (! -f $filename);
586		if (! -f $filename) {
587			print STDERR
588			    "Cannot open $file.$DEFENCODING.src or fallback\n";
589			next;
590		}
591		open(FIN, "$filename");
592		print "Reading from $filename for ${l}_${f}_${c}\n";
593		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
594		my @lines = <FIN>;
595		chomp(@lines);
596		close(FIN);
597		my $continue = 0;
598		foreach my $k (keys(%keys)) {
599			foreach my $line (@lines) {
600				$line =~ s/\r//;
601				next if (!$continue && $line !~ /^$k\s/);
602				if ($continue) {
603					$line =~ s/^\s+//;
604				} else {
605					$line =~ s/^$k\s+//;
606				}
607
608				$values{$l}{$f}{$c}{$k} = ""
609					if (!defined $values{$l}{$f}{$c}{$k});
610
611				$continue = ($line =~ /\/$/);
612				$line =~ s/\/$// if ($continue);
613
614				while ($line =~ /_/) {
615					$line =~
616					    s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
617				}
618				die "_ in data - $line" if ($line =~ /_/);
619				$values{$l}{$f}{$c}{$k} .= $line;
620
621				last if (!$continue);
622			}
623		}
624	}
625	}
626	}
627}
628
629sub decodecldr {
630	my $e = shift;
631	my $s = shift;
632
633	my $v = undef;
634
635	if ($e eq "UTF-8") {
636		#
637		# Conversion to UTF-8 can be done from the Unicode name to
638		# the UTF-8 character code.
639		#
640		$v = $utf8map{$s};
641		die "Cannot convert $s in $e (charmap)" if (!defined $v);
642	} else {
643		#
644		# Conversion to these encodings can be done from the Unicode
645		# name to Unicode code to the encodings code.
646		#
647		my $ucc = undef;
648		$ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
649		$ucc = $ucd{name2code}{$utf8aliases{$s}}
650			if (!defined $ucc
651			 && $utf8aliases{$s}
652			 && defined $ucd{name2code}{$utf8aliases{$s}});
653
654		if (!defined $ucc) {
655			if (defined $translations{$e}{$s}{hex}) {
656				$v = $translations{$e}{$s}{hex};
657				$ucc = 0;
658			} elsif (defined $translations{$e}{$s}{ucc}) {
659				$ucc = $translations{$e}{$s}{ucc};
660			}
661		}
662
663		die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
664		$v = $convertors{$e}{$ucc} if (!defined $v);
665
666		$v = $translations{$e}{$s}{hex}
667			if (!defined $v && defined $translations{$e}{$s}{hex});
668
669		if (!defined $v && defined $translations{$e}{$s}{unicode}) {
670			my $ucn = $translations{$e}{$s}{unicode};
671			$ucc = $ucd{name2code}{$ucn}
672				if (defined $ucd{name2code}{$ucn});
673			$ucc = $ucd{name2code}{$utf8aliases{$ucn}}
674				if (!defined $ucc
675				 && defined $ucd{name2code}{$utf8aliases{$ucn}});
676			$v = $convertors{$e}{$ucc};
677		}
678
679		die "Cannot convert $s in $e (charmap)" if (!defined $v);
680	}
681
682	return pack("C", hex($v)) if (length($v) == 2);
683	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
684		if (length($v) == 4);
685	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
686	    hex(substr($v, 4, 2))) if (length($v) == 6);
687	print STDERR "Cannot convert $e $s\n";
688	return "length = " . length($v);
689
690}
691
692sub translate {
693	my $enc = shift;
694	my $v = shift;
695
696	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
697	return undef;
698}
699
700sub print_fields {
701	foreach my $l (sort keys(%languages)) {
702	foreach my $f (sort keys(%{$languages{$l}})) {
703	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
704		next if ($#filter == 2 && ($filter[0] ne $l
705		    || $filter[1] ne $f || $filter[2] ne $c));
706		next if (defined $languages{$l}{$f}{definitions}
707		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
708		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
709			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
710				print "Skipping ${l}_" .
711				    ($f eq "x" ? "" : "${f}_") .
712				    "${c} - not read\n";
713				next;
714			}
715			my $file = $l;
716			$file .= "_" . $f if ($f ne "x");
717			$file .= "_" . $c;
718			print "Writing to $file in $enc\n";
719
720			if ($enc ne $DEFENCODING &&
721			    !defined $convertors{$enc}) {
722				print "Failed! Cannot convert to $enc.\n";
723				next;
724			};
725
726			open(FOUT, ">$TYPE.draft/$file.$enc.new");
727			my $okay = 1;
728			my $output = "";
729			print FOUT <<EOF;
730# Warning: Do not edit. This file is automatically generated from the
731# tools in /usr/src/tools/tools/locale. The data is obtained from the
732# CLDR project, obtained from http://cldr.unicode.org/
733# -----------------------------------------------------------------------------
734EOF
735			foreach my $k (keys(%keys)) {
736				my $g = $keys{$k};
737
738				die("Unknown $k in \%DESC")
739					if (!defined $DESC{$k});
740
741				$output .= "#\n# $DESC{$k}\n";
742
743				# Replace one row with another
744				if ($g =~ /^>/) {
745					$k = substr($g, 1);
746					$g = $keys{$k};
747				}
748
749				# Callback function
750				if ($g =~ /^\</) {
751					$callback{data}{c} = $c;
752					$callback{data}{k} = $k;
753					$callback{data}{f} = $f;
754					$callback{data}{l} = $l;
755					$callback{data}{e} = $enc;
756					my @a = split(/\</, substr($g, 1));
757					my $rv =
758					    &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]});
759					$values{$l}{$f}{$c}{$k} = $rv;
760					$g = $a[2];
761					$callback{data} = ();
762				}
763
764				my $v = $values{$l}{$f}{$c}{$k};
765				$v = "undef" if (!defined $v);
766
767				if ($g eq "i") {
768					$output .= "$v\n";
769					next;
770				}
771				if ($g eq "ai") {
772					$output .= "$v\n";
773					next;
774				}
775				if ($g eq "s") {
776					$v =~ s/^"//;
777					$v =~ s/"$//;
778					my $cm = "";
779					while ($v =~ /^(.*?)<(.*?)>(.*)/) {
780						my $p1 = $1;
781						$cm = $2;
782						my $p3 = $3;
783
784						my $rv = decodecldr($enc, $cm);
785#						$rv = translate($enc, $cm)
786#							if (!defined $rv);
787						if (!defined $rv) {
788							print STDERR
789"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
790							$okay = 0;
791							next;
792						}
793
794						$v = $p1 . $rv . $p3;
795					}
796					$output .= "$v\n";
797					next;
798				}
799				if ($g eq "as") {
800					foreach my $v (split(/;/, $v)) {
801						$v =~ s/^"//;
802						$v =~ s/"$//;
803						my $cm = "";
804						while ($v =~ /^(.*?)<(.*?)>(.*)/) {
805							my $p1 = $1;
806							$cm = $2;
807							my $p3 = $3;
808
809							my $rv =
810							    decodecldr($enc,
811								$cm);
812#							$rv = translate($enc,
813#							    $cm)
814#							    if (!defined $rv);
815							if (!defined $rv) {
816								print STDERR
817"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
818								$okay = 0;
819								next;
820							}
821
822							$v = $1 . $rv . $3;
823						}
824						$output .= "$v\n";
825					}
826					next;
827				}
828
829				die("$k is '$g'");
830
831			}
832
833			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
834			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
835			print FOUT "$output# EOF\n";
836			close(FOUT);
837
838			if ($okay) {
839				rename("$TYPE.draft/$file.$enc.new",
840				    "$TYPE.draft/$file.$enc.src");
841			} else {
842				rename("$TYPE.draft/$file.$enc.new",
843				    "$TYPE.draft/$file.$enc.failed");
844			}
845		}
846	}
847	}
848	}
849}
850
851sub make_makefile {
852	return if ($#filter > -1);
853	print "Creating Makefile for $TYPE\n";
854	my $SRCOUT;
855	my $SRCOUT2;
856	my $SRCOUT3 = "";
857	my $SRCOUT4 = "";
858	my $MAPLOC;
859	if ($TYPE eq "colldef") {
860		$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
861			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
862			"\${.OBJDIR}/\${.IMPSRC:T:R}";
863		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
864				"locale/etc/final-maps\n";
865		$SRCOUT2 = "LC_COLLATE";
866		$SRCOUT3 = "" .
867			".for f t in \${LOCALES_MAPPED}\n" .
868			"FILES+=\t\$t.LC_COLLATE\n" .
869			"FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" .
870			"\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
871			"\tlocaledef -D -U -i \${.ALLSRC} \\\n" .
872			"\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
873			"\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
874			".endfor\n\n";
875		$SRCOUT4 = "## LOCALES_MAPPED\n";
876	}
877	elsif ($TYPE eq "ctypedef") {
878		$SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
879			"\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
880			"\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
881			" || true";
882		$SRCOUT2 = "LC_CTYPE";
883		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
884				"locale/etc/final-maps\n";
885		$SRCOUT3 = "## SYMPAIRS\n\n" .
886			".for s t in \${SYMPAIRS}\n" .
887			"\${t:S/src\$/LC_CTYPE/}: " .
888			"\$s\n" .
889			"\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
890			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
891			"\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
892			" || true\n" .
893			".endfor\n\n";
894	}
895	else {
896		$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
897		$SRCOUT2 = "out";
898		$MAPLOC = "";
899	}
900	open(FOUT, ">$TYPE.draft/Makefile");
901	print FOUT <<EOF;
902# \$FreeBSD\$
903# Warning: Do not edit. This file is automatically generated from the
904# tools in /usr/src/tools/tools/locale.
905
906LOCALEDIR=	\${SHAREDIR}/locale
907FILESNAME=	$FILESNAMES{$TYPE}
908.SUFFIXES:	.src .${SRCOUT2}
909${MAPLOC}
910.src.${SRCOUT2}:
911	$SRCOUT
912
913## PLACEHOLDER
914
915${SRCOUT4}
916
917EOF
918
919	foreach my $hash (keys(%hashtable)) {
920		# For colldef, weight LOCALES to UTF-8
921		#     Sort as upper-case and reverse to achieve it
922		#     Make en_US, ru_RU, and ca_AD preferred
923		my @files;
924		if ($TYPE eq "colldef") {
925			@files = sort {
926				if ($a eq 'en_x_US.UTF-8' ||
927				    $a eq 'ru_x_RU.UTF-8' ||
928				    $a eq 'ca_x_AD.UTF-8') { return -1; }
929				elsif ($b eq 'en_x_US.UTF-8' ||
930				       $b eq 'ru_x_RU.UTF-8' ||
931				       $b eq 'ca_x_AD.UTF-8') { return 1; }
932				else { return uc($b) cmp uc($a); }
933				} keys(%{$hashtable{$hash}});
934		} elsif ($TYPE eq "ctypedef") {
935			@files = sort {
936				if ($a eq 'en_x_US.UTF-8') { return -1; }
937				elsif ($b eq 'en_x_US.UTF-8') { return 1; }
938				if ($a =~ /^en_x_US/) { return -1; }
939				elsif ($b =~ /^en_x_US/) { return 1; }
940
941				if ($a =~ /^en_x_GB.ISO8859-15/ ||
942				    $a =~ /^ru_x_RU/) { return -1; }
943				elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
944				       $b =~ /ru_x_RU/) { return 1; }
945				else { return uc($b) cmp uc($a); }
946
947				} keys(%{$hashtable{$hash}});
948		} else {
949			@files = sort {
950				if ($a =~ /_Comm_/ ||
951				    $b eq 'en_x_US.UTF-8') { return 1; }
952				elsif ($b =~ /_Comm_/ ||
953				       $a eq 'en_x_US.UTF-8') { return -1; }
954				else { return uc($b) cmp uc($a); }
955				} keys(%{$hashtable{$hash}});
956		}
957		if ($#files > 0) {
958			my $link = shift(@files);
959			$link =~ s/_x_/_/;	# strip family if none there
960			foreach my $file (@files) {
961				my @a = split(/_/, $file);
962				my @b = split(/\./, $a[-1]);
963				$file =~ s/_x_/_/;
964				print FOUT "SAME+=\t\t$link $file\n";
965				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
966			}
967		}
968	}
969
970	foreach my $l (sort keys(%languages)) {
971	foreach my $f (sort keys(%{$languages{$l}})) {
972	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
973		next if ($#filter == 2 && ($filter[0] ne $l
974		    || $filter[1] ne $f || $filter[2] ne $c));
975		next if (defined $languages{$l}{$f}{definitions}
976		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
977		if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
978		 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
979			print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
980			    "${c} - not read\n";
981			next;
982		}
983		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
984			my $file = $l . "_";
985			$file .= $f . "_" if ($f ne "x");
986			$file .= $c;
987			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
988			print FOUT "LOCALES+=\t$file.$e\n";
989		}
990
991		if (defined $languages{$l}{$f}{nc_link}) {
992			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
993				my $file = $l . "_";
994				$file .= $f . "_" if ($f ne "x");
995				$file .= $c;
996				print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
997			}
998		}
999
1000		if (defined $languages{$l}{$f}{e_link}) {
1001			foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
1002				my @a = split(/:/, $el);
1003				my $file = $l . "_";
1004				$file .= $f . "_" if ($f ne "x");
1005				$file .= $c;
1006				print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
1007			}
1008		}
1009
1010	}
1011	}
1012	}
1013
1014	print FOUT <<EOF;
1015
1016FILES=		\${LOCALES:S/\$/.${SRCOUT2}/}
1017CLEANFILES=	\${FILES}
1018
1019.for f t in \${SAME}
1020SYMLINKS+=	../\$f/\${FILESNAME} \\
1021    \${LOCALEDIR}/\$t/\${FILESNAME}
1022.endfor
1023
1024.for f in \${LOCALES}
1025FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
1026.endfor
1027
1028${SRCOUT3}.include <bsd.prog.mk>
1029EOF
1030
1031	close(FOUT);
1032}
1033