1#!/usr/bin/perl
2
3#################################################################################
4# Author:  Serge Kozlov								#
5# Date:    09/21/2005								#
6# Purpose: The script produces a text file ../data/charset_utf8.txt		#
7#	   in UTF8 format. Each line contains one UTF8 character 		#
8#	   between 20 and 07FF							#
9#################################################################################
10
11genfile("../data/charset_utf8.txt", 33, 2047, "utf8");
12genfile(">../data/charset_utf8.txt", 19968, 20479,"utf8");
13
14
15#genfile("../data/charset_utf8.txt", 33, 2047, "utf8");
16
17sub genfile ($$$$)
18{
19    my $fn = shift;
20    my $i_from = shift;
21    my $i_to = shift;
22    my $typ = shift;
23    open F, ">$fn";
24    for ($i = $i_from; $i <= $i_to; $i++)
25    {
26	if (($i > 32) && ($i < 65533) && ($i != 65279))
27	{
28	    if ($typ eq "utf8")
29	    {
30		if ($i < 128)
31		{
32		    print F pack("C", $i), "\n";
33		}
34		elsif (($i > 127) && ($i < 2048))
35		{
36		    $b1 = (($i & 1984) >> 6) + 192;
37		    $b2 = ($i & 63) + 128;
38		    print F pack("C", $b1);
39		    print F pack("C", $b2), "\n";
40		}
41		else
42		{
43		    $b1 = (($i & 61440) >> 12) + 224;
44		    $b2 = (($i & 4032) >> 6) + 128;
45		    $b3 = ($i & 63) + 128;
46		    print F pack("C", $b1);
47		    print F pack("C", $b2);
48		    print F pack("C", $b3), "\n";
49	        }
50	    }
51	    elsif ($typ eq "ucs2")
52	    {
53		print F pack("C", 0);
54		print F pack("C", $i);
55		print F pack("C", 0), "\n";
56	    }
57	}
58    }
59    close F;
60}
61