1 unit TestLazUTF16;
2 
3 {$mode objfpc}{$H+}
4 
5 interface
6 
7 uses
8   Classes, SysUtils, fpcunit, testglobals, LazUTF8, LazUTF16, LazLogger;
9 
10 type
11 
12   { TTestUnicode }
13 
14   TTestUnicode= class(TTestCase)
15   published
16     procedure TestUTF8ToUTF16;
17     procedure TestUTF16ToUTF8;
18     procedure TestUTF16ToUnicode;
19     procedure TestUnicodeToUTF16;
20     procedure TestUTF8CharacterToUnicode;
21   end;
22 
23 const
24   Limits: Array [0..9] of Cardinal =
25     (0, $7F, $80, $7FF, $800, $D7FF, $E000, $FFFF, $10000, $10FFFF);
26 
27 implementation
28 
29 { TTestUnicode }
30 
31 procedure TTestUnicode.TestUTF8ToUTF16;
32 var
33   U: Cardinal;
34   I1, I2: Integer;
35   SUTF8, S1UTF8: UTF8String;
36   SUTF16, S1UTF16, R: WideString;
37 begin
38   for U := 0 to $10FFFF do // test each unicode char
39   begin
40     if (U >= $D800) and (U <= $DFFF) then Continue;
41 
42     SUTF8 := UnicodeToUTF8(U);
43     SUTF16 := UnicodeToUTF16(U);
44     R := UTF8ToUTF16(SUTF8);
45 
46     AssertEquals('UTF8ToUTF16 of unicode char: ' + IntToHex(U, 6) + ' error! ' + DbgWideStr(SUTF16) + ' ' + DbgWideStr(R),
47       DbgStr(UTF8Encode(SUTF16)), DbgStr(UTF8Encode(R)));
48   end;
49 
50   for I1 := 0 to High(Limits) do // test two char string with limit char values
51   begin
52     S1UTF8 := UnicodeToUTF8(Limits[I1]);
53     S1UTF16 := UnicodeToUTF16(Limits[I1]);
54 
55     for I2 := 0 to High(Limits) do
56     begin
57       SUTF8 := S1UTF8 + UnicodeToUTF8(Limits[I2]);
58       SUTF16 := S1UTF16 + UnicodeToUTF16(Limits[I2]);
59       R := UTF8ToUTF16(SUTF8);
60 
61       AssertEquals('UTF8ToUTF16 of two unicode chars: ' +
62         IntToHex(Limits[I1], 6) + IntToHex(Limits[I2], 6) + ' error!',
63         UTF8Encode(SUTF16), UTF8Encode(R));
64     end;
65   end;
66 end;
67 
68 procedure TTestUnicode.TestUTF16ToUTF8;
69 var
70   U: Cardinal;
71   I1, I2: Integer;
72   SUTF8, S1UTF8, R: String;
73   SUTF16, S1UTF16: WideString;
74 begin
75   for U := 0 to $10FFFF do
76   begin
77     if (U >= $D800) and (U <= $DFFF) then Continue;
78 
79     SUTF8 := UnicodeToUTF8(U);
80     SUTF16 := UnicodeToUTF16(U);
81     R := UTF16ToUTF8(SUTF16);
82 
83     AssertEquals('UTF16ToUTF8 of unicode char: ' + IntToHex(U, 6) + ' error! "' + DbgStr(PChar(SUTF16),length(SUTF16)*2) + '" "' + DbgStr(R)+'"',
84       DbgStr(SUTF8), DbgStr(R));
85   end;
86 
87   for I1 := 0 to High(Limits) do
88   begin
89     S1UTF8 := UnicodeToUTF8(Limits[I1]);
90     S1UTF16 := UnicodeToUTF16(Limits[I1]);
91 
92     for I2 := 0 to High(Limits) do
93     begin
94       SUTF8 := S1UTF8 + UnicodeToUTF8(Limits[I2]);
95       SUTF16 := S1UTF16 + UnicodeToUTF16(Limits[I2]);
96       R := UTF16ToUTF8(SUTF16);
97 
98       AssertEquals('UTF16ToUTF8 of two unicode chars: ' +
99         IntToHex(Limits[I1], 6) + IntToHex(Limits[I2], 6) + ' error!',
100         SUTF8, R);
101     end;
102   end;
103 end;
104 
105 procedure TTestUnicode.TestUTF16ToUnicode;
106 var
107   L: Integer;
108 begin
109   AssertEquals(0, UTF16CharacterToUnicode(#0, L));
110   AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L));
111   AssertEquals($E000, UTF16CharacterToUnicode(#$E000, L));
112   AssertEquals($FFFF, UTF16CharacterToUnicode(#$FFFF, L));
113   AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L));
114   AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L));
115   AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L));
116 end;
117 
118 procedure TTestUnicode.TestUnicodeToUTF16;
119 
120   procedure t(a,b: widestring);
121   begin
122     if a=b then exit;
123     AssertEquals(dbgstr(PChar(a),length(a)*2), dbgstr(PChar(b),length(b)*2));
124   end;
125 
126 begin
127   t(widestring(#0), UnicodeToUTF16(0));
128   t(widestring(#$D7FF), UnicodeToUTF16($D7FF));
129   t(widestring(#$E000), UnicodeToUTF16($E000));
130   t(widestring(#$FFFF), UnicodeToUTF16($FFFF));
131   t(widestring(#$D800#$DC00), UnicodeToUTF16($10000));
132   t(widestring(#$D800#$DC01), UnicodeToUTF16($10001));
133   t(widestring(#$DBFF#$DFFD), UnicodeToUTF16($10FFFD));
134 end;
135 
136 procedure TTestUnicode.TestUTF8CharacterToUnicode;
137 var
138   i,u: cardinal;
139   s: String;
140   dum: integer;
141 begin
142   for i:=0 to $10FFFF do
143   begin
144     s:=UnicodeToUTF8(i);
145     u:=UTF8CodepointToUnicode(PChar(s), dum);
146     AssertEquals('got (hexidecimal): ' + InttoHex(u,6), i, u);
147   end;
148 end;
149 
150 initialization
151 
152   AddToLCLTestSuite(TTestUnicode);
153 end.
154 
155