1#!/usr/bin/perl 2 3################################################################################# 4# Author: Serge Kozlov # 5# Date: 09/21/2005 # 6# Purpose: The script produces a text file ../data/charset_utf8.txt # 7# in UTF8 format. Each line contains one UTF8 character # 8# between 20 and 07FF # 9################################################################################# 10 11genfile("../data/charset_utf8.txt", 33, 2047, "utf8"); 12genfile(">../data/charset_utf8.txt", 19968, 20479,"utf8"); 13 14 15#genfile("../data/charset_utf8.txt", 33, 2047, "utf8"); 16 17sub genfile ($$$$) 18{ 19 my $fn = shift; 20 my $i_from = shift; 21 my $i_to = shift; 22 my $typ = shift; 23 open F, ">$fn"; 24 for ($i = $i_from; $i <= $i_to; $i++) 25 { 26 if (($i > 32) && ($i < 65533) && ($i != 65279)) 27 { 28 if ($typ eq "utf8") 29 { 30 if ($i < 128) 31 { 32 print F pack("C", $i), "\n"; 33 } 34 elsif (($i > 127) && ($i < 2048)) 35 { 36 $b1 = (($i & 1984) >> 6) + 192; 37 $b2 = ($i & 63) + 128; 38 print F pack("C", $b1); 39 print F pack("C", $b2), "\n"; 40 } 41 else 42 { 43 $b1 = (($i & 61440) >> 12) + 224; 44 $b2 = (($i & 4032) >> 6) + 128; 45 $b3 = ($i & 63) + 128; 46 print F pack("C", $b1); 47 print F pack("C", $b2); 48 print F pack("C", $b3), "\n"; 49 } 50 } 51 elsif ($typ eq "ucs2") 52 { 53 print F pack("C", 0); 54 print F pack("C", $i); 55 print F pack("C", 0), "\n"; 56 } 57 } 58 } 59 close F; 60} 61