1 {
2 *****************************************************************************
3 This file is part of LazUtils.
4
5 See the file COPYING.modifiedLGPL.txt, included in this distribution,
6 for details about the license.
7 *****************************************************************************
8
9 This unit includes string routines which are based on UTF-16 implementations,
10 although it might also include routines for other encodings.
11
12 A UTF-16 based implementation for LowerCase, for example, is faster in WideString
13 and UnicodeString then the default UTF-8 implementation.
14
15 Currently this unit includes only UTF8LowerCaseViaTables which is based on a
16 UTF-16 table, but it might be extended to include various UTF-16 routines.
17 }
18 unit LazUTF16;
19
20 {$IFDEF FPC}
21 {$mode objfpc}{$H+}
22 {$ENDIF}
23
24 interface
25
26 uses
27 Classes, SysUtils
28 {$IFDEF FPC}
29 ,lazutf8
30 {$ENDIF}
31 ;
32
33 {$IFnDEF FPC}
34 type
35 PtrInt = NativeInt;
36 {$ENDIF}
37
UTF16CharacterLengthnull38 function UTF16CharacterLength(p: PWideChar): integer;
UTF16Lengthnull39 function UTF16Length(const s: widestring): PtrInt; overload;
UTF16Lengthnull40 function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; overload;
UTF16Copynull41 function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
UTF16CharStartnull42 function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar;
UTF16Posnull43 function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt;
UTF16CharacterToUnicodenull44 function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
UnicodeToUTF16null45 function UnicodeToUTF16(u: cardinal): widestring;
IsUTF16CharValidnull46 function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean;
IsUTF16StringValidnull47 function IsUTF16StringValid(AWideStr: widestring): Boolean;
Utf16StringReplacenull48 function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString;
49
UnicodeLowercasenull50 function UnicodeLowercase(u: cardinal): cardinal;
51 {$IFDEF FPC}
UTF8LowerCaseViaTablesnull52 function UTF8LowerCaseViaTables(const s: string): string;
53 {$ENDIF}
54
55 implementation
56
UTF16CharacterLengthnull57 function UTF16CharacterLength(p: PWideChar): integer;
58 // returns length of UTF16 character in number of words
59 // The endianess of the machine will be taken.
60 begin
61 if p<>nil then begin
62 if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
63 Result:=1
64 else
65 Result:=2;
66 end else begin
67 Result:=0;
68 end;
69 end;
70
UTF16Lengthnull71 function UTF16Length(const s: widestring): PtrInt;
72 begin
73 Result:=UTF16Length(PWideChar(s),length(s));
74 end;
75
UTF16Lengthnull76 function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
77 var
78 CharLen: LongInt;
79 begin
80 Result:=0;
81 while (WordCount>0) do begin
82 inc(Result);
83 CharLen:=UTF16CharacterLength(p);
84 inc(p,CharLen);
85 dec(WordCount,CharLen);
86 end;
87 end;
88
UTF16Copynull89 function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
90 // returns substring
91 var
92 StartPos: PWideChar;
93 EndPos: PWideChar;
94 MaxBytes: PtrInt;
95 begin
96 StartPos:=UTF16CharStart(PWideChar(s),length(s),StartCharIndex-1);
97 if StartPos=nil then
98 Result:=''
99 else begin
100 MaxBytes:=PtrInt(PWideChar(s)+length(s)-StartPos);
101 EndPos:=UTF16CharStart(StartPos,MaxBytes,CharCount);
102 if EndPos=nil then
103 Result:=copy(s,StartPos-PWideChar(s)+1,MaxBytes)
104 else
105 Result:=copy(s,StartPos-PWideChar(s)+1,EndPos-StartPos);
106 end;
107 end;
108
UTF16CharStartnull109 function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar;
110 // Len is the length in words of P.
111 // CharIndex is the position of the desired UnicodeChar (starting at 0).
112 var
113 CharLen: LongInt;
114 begin
115 Result:=P;
116 if Result=nil then Exit;
117 while (CharIndex>0) and (Len>0) do
118 begin
119 CharLen:=UTF16CharacterLength(Result);
120 dec(Len,CharLen);
121 dec(CharIndex);
122 inc(Result,CharLen);
123 end;
124 if (CharIndex<>0) or (Len<0) then
125 Result:=nil;
126 end;
127
IndexOfWideCharnull128 function IndexOfWideChar(Const Buf: PWideChar; Len: PtrInt; b: WideChar): PtrInt;
129 begin
130 for Result:=0 to len-1 do
131 if buf[Result]=b then
132 Exit;
133 Result:=-1;
134 end;
135
136 // Helper used by UTF16Pos
UTF16PosPnull137 function UTF16PosP(SearchForText: PWideChar; SearchForTextLen: PtrInt;
138 SearchInText: PWideChar; SearchInTextLen: PtrInt): PWideChar;
139 // returns the position where SearchInText starts in SearchForText
140 // returns nil if not found
141 var
142 p: PtrInt;
143 begin
144 Result:=nil;
145 if (SearchForText=nil) or (SearchForTextLen=0) or (SearchInText=nil) then
146 exit;
147 while SearchInTextLen>0 do begin
148 p:=IndexOfWideChar(SearchInText,SearchInTextLen,SearchForText^);
149 if p<0 then exit;
150 inc(SearchInText, p);
151 dec(SearchInTextLen, p);
152 if SearchInTextLen<SearchForTextLen then exit;
153 if CompareMem(SearchInText,SearchForText,SearchForTextLen * 2) then
154 exit(SearchInText);
155 inc(SearchInText);
156 dec(SearchInTextLen);
157 end;
158 end;
159
UTF16Posnull160 function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt;
161 // returns the character index, where the SearchForText starts in SearchInText
162 // an optional StartPos can be given (in UTF-16 codepoints, not in word)
163 // returns 0 if not found
164 var
165 i: PtrInt;
166 p: PWideChar;
167 StartPosP: PWideChar;
168 begin
169 Result:=0;
170 if StartPos=1 then
171 begin
172 i:=System.Pos(SearchForText,SearchInText);
173 if i>0 then
174 Result:=UTF16Length(PWideChar(SearchInText),i-1)+1;
175 end
176 else if StartPos>1 then
177 begin
178 // skip
179 StartPosP:=UTF16CharStart(PWideChar(SearchInText),Length(SearchInText),StartPos-1);
180 if StartPosP=nil then exit;
181 // search
182 p:=UTF16PosP(PWideChar(SearchForText),length(SearchForText),
183 StartPosP,length(SearchInText)+PWideChar(SearchInText)-StartPosP);
184 // get UTF-8 position
185 if p=nil then exit;
186 Result:=StartPos+UTF16Length(StartPosP,p-StartPosP);
187 end;
188 end;
189
UTF16CharacterToUnicodenull190 function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
191 var
192 w1: cardinal;
193 w2: Cardinal;
194 begin
195 if p<>nil then begin
196 w1:=ord(p[0]);
197 if (w1 < $D800) or (w1 > $DFFF) then begin
198 // is 1 word character
199 Result:=w1;
200 CharLen:=1;
201 end else begin
202 // could be 2 word character
203 w2:=ord(p[1]);
204 if (w2>=$DC00) then begin
205 // is 2 word character
206 Result:=(w1-$D800) shl 10 + (w2-$DC00) + $10000;
207 CharLen:=2;
208 end else begin
209 // invalid character
210 Result:=w1;
211 CharLen:=1;
212 end;
213 end;
214 end else begin
215 Result:=0;
216 CharLen:=0;
217 end;
218 end;
219
UnicodeToUTF16null220 function UnicodeToUTF16(u: cardinal): widestring;
221 begin
222 // u should be <= $10FFFF to fit into UTF-16
223
224 if u < $10000 then
225 // Note: codepoints $D800 - $DFFF are reserved
226 Result:=system.widechar(u)
227 else
228 Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
229 end;
230
231 // Specification here: http://unicode.org/faq/utf_bom.html#utf16-7
232 // Q: Are there any 16-bit values that are invalid?
233 // A: Unpaired surrogates are invalid in UTFs. These include any value in the
234 // range D800 to DBFF not followed by a value in the range DC00 to DFFF,
235 // or any value in the range DC00 to DFFF not preceded by a value in the range D800 to DBFF. [AF]
236 //
237 // Use ANextChar = #0 to indicate that there is no next char
IsUTF16CharValidnull238 function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean;
239 begin
240 if AChar = #0 then Exit(False);
241 Result := ((AChar >= #$D800) and (AChar <= #$DBFF)) and not ((ANextChar >= #$DC00) and (ANextChar <= #$DFFF));
242 //Result := (Word(AChar) in [$D800..$DBFF]) and not (Word(ANextChar) in [$DC00..$DFFF]); <= generates range check error
243 Result := not Result;
244 end;
245
IsUTF16StringValidnull246 function IsUTF16StringValid(AWideStr: widestring): Boolean;
247 var
248 i: Integer;
249 begin
250 Result := True;
251 for i := 1 to Length(AWideStr)-1 do
252 begin
253 Result := Result and IsUTF16CharValid(AWideStr[i], AWideStr[i+1]);
254 if not Result then Exit;
255 end;
256 end;
257
258 //Same as SysUtil.StringReplace but for WideStrings/UnicodeStrings, since it's not available in fpc yet
259 function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString;
260 var
261 Srch, OldP, RemS: WideString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
262 P: Integer;
263 begin
264 Srch:=S;
265 OldP:=OldPattern;
266 if rfIgnoreCase in Flags then
267 begin
268 Srch:=WideUpperCase(Srch);
269 OldP:=WideUpperCase(OldP);
270 end;
271 RemS:=S;
272 Result:='';
273 while (Length(Srch)<>0) do
274 begin
275 P:=Pos(OldP, Srch);
276 if P=0 then
277 begin
278 Result:=Result+RemS;
279 Srch:='';
280 end
281 else
282 begin
283 Result:=Result+Copy(RemS,1,P-1)+NewPattern;
284 P:=P+Length(OldP);
285 RemS:=Copy(RemS,P,Length(RemS)-P+1);
286 if not (rfReplaceAll in Flags) then
287 begin
288 Result:=Result+RemS;
289 Srch:='';
290 end
291 else
292 Srch:=Copy(Srch,P,Length(Srch)-P+1);
293 end;
294 end;
295 end;
296
297
298 // Lowercase Unicode Tables which match UTF-16 but also UTF-32
299 var
300 UnicodeLower00C0_00DE: array[$00C0..$00DE] of word;
301 UnicodeLower0100_024E: array[$0100..$024E] of word;
302 UnicodeLower0386_03AB: array[$0386..$03AB] of word;
303 UnicodeLower03D8_042F: array[$03D8..$042F] of word;
304 UnicodeLower0460_0512: array[$0460..$0512] of word;
305 UnicodeLower1E00_1FFC: array[$1E00..$1FFC] of word;
306 UnicodeLower2126_2183: array[$2126..$2183] of word;
307 UnicodeLower2C60_2CE2: array[$2C60..$2CE2] of word;
308
309 procedure InitUnicodeTables;
310 var
311 i: Integer;
312 begin
313 for i:=Low(UnicodeLower00C0_00DE) to High(UnicodeLower00C0_00DE) do
314 UnicodeLower00C0_00DE[i]:=i+32;
315 UnicodeLower00C0_00DE[$00D7]:=$00D7;
316
317 for i:=Low(UnicodeLower0100_024E) to High(UnicodeLower0100_024E) do
318 UnicodeLower0100_024E[i]:=i;
319 UnicodeLower0100_024E[$0100]:=$0101;
320 UnicodeLower0100_024E[$0102]:=$0103;
321 UnicodeLower0100_024E[$0104]:=$0105;
322 UnicodeLower0100_024E[$0106]:=$0107;
323 UnicodeLower0100_024E[$0108]:=$0109;
324 UnicodeLower0100_024E[$010A]:=$010B;
325 UnicodeLower0100_024E[$010C]:=$010D;
326 UnicodeLower0100_024E[$010E]:=$010F;
327 UnicodeLower0100_024E[$0110]:=$0111;
328 UnicodeLower0100_024E[$0112]:=$0113;
329 UnicodeLower0100_024E[$0114]:=$0115;
330 UnicodeLower0100_024E[$0116]:=$0117;
331 UnicodeLower0100_024E[$0118]:=$0119;
332 UnicodeLower0100_024E[$011A]:=$011B;
333 UnicodeLower0100_024E[$011C]:=$011D;
334 UnicodeLower0100_024E[$011E]:=$011F;
335 UnicodeLower0100_024E[$0120]:=$0121;
336 UnicodeLower0100_024E[$0122]:=$0123;
337 UnicodeLower0100_024E[$0124]:=$0125;
338 UnicodeLower0100_024E[$0126]:=$0127;
339 UnicodeLower0100_024E[$0128]:=$0129;
340 UnicodeLower0100_024E[$012A]:=$012B;
341 UnicodeLower0100_024E[$012C]:=$012D;
342 UnicodeLower0100_024E[$012E]:=$012F;
343 UnicodeLower0100_024E[$0130]:=$0069;
344 UnicodeLower0100_024E[$0132]:=$0133;
345 UnicodeLower0100_024E[$0134]:=$0135;
346 UnicodeLower0100_024E[$0136]:=$0137;
347 UnicodeLower0100_024E[$0139]:=$013A;
348 UnicodeLower0100_024E[$013B]:=$013C;
349 UnicodeLower0100_024E[$013D]:=$013E;
350 UnicodeLower0100_024E[$013F]:=$0140;
351 UnicodeLower0100_024E[$0141]:=$0142;
352 UnicodeLower0100_024E[$0143]:=$0144;
353 UnicodeLower0100_024E[$0145]:=$0146;
354 UnicodeLower0100_024E[$0147]:=$0148;
355 UnicodeLower0100_024E[$014A]:=$014B;
356 UnicodeLower0100_024E[$014C]:=$014D;
357 UnicodeLower0100_024E[$014E]:=$014F;
358 UnicodeLower0100_024E[$0150]:=$0151;
359 UnicodeLower0100_024E[$0152]:=$0153;
360 UnicodeLower0100_024E[$0154]:=$0155;
361 UnicodeLower0100_024E[$0156]:=$0157;
362 UnicodeLower0100_024E[$0158]:=$0159;
363 UnicodeLower0100_024E[$015A]:=$015B;
364 UnicodeLower0100_024E[$015C]:=$015D;
365 UnicodeLower0100_024E[$015E]:=$015F;
366 UnicodeLower0100_024E[$0160]:=$0161;
367 UnicodeLower0100_024E[$0162]:=$0163;
368 UnicodeLower0100_024E[$0164]:=$0165;
369 UnicodeLower0100_024E[$0166]:=$0167;
370 UnicodeLower0100_024E[$0168]:=$0169;
371 UnicodeLower0100_024E[$016A]:=$016B;
372 UnicodeLower0100_024E[$016C]:=$016D;
373 UnicodeLower0100_024E[$016E]:=$016F;
374 UnicodeLower0100_024E[$0170]:=$0171;
375 UnicodeLower0100_024E[$0172]:=$0173;
376 UnicodeLower0100_024E[$0174]:=$0175;
377 UnicodeLower0100_024E[$0176]:=$0177;
378 UnicodeLower0100_024E[$0178]:=$00FF;
379 UnicodeLower0100_024E[$0179]:=$017A;
380 UnicodeLower0100_024E[$017B]:=$017C;
381 UnicodeLower0100_024E[$017D]:=$017E;
382 UnicodeLower0100_024E[$0181]:=$0253;
383 UnicodeLower0100_024E[$0182]:=$0183;
384 UnicodeLower0100_024E[$0184]:=$0185;
385 UnicodeLower0100_024E[$0186]:=$0254;
386 UnicodeLower0100_024E[$0187]:=$0188;
387 UnicodeLower0100_024E[$0189]:=$0256;
388 UnicodeLower0100_024E[$018A]:=$0257;
389 UnicodeLower0100_024E[$018B]:=$018C;
390 UnicodeLower0100_024E[$018E]:=$01DD;
391 UnicodeLower0100_024E[$018F]:=$0259;
392 UnicodeLower0100_024E[$0190]:=$025B;
393 UnicodeLower0100_024E[$0191]:=$0192;
394 UnicodeLower0100_024E[$0193]:=$0260;
395 UnicodeLower0100_024E[$0194]:=$0263;
396 UnicodeLower0100_024E[$0196]:=$0269;
397 UnicodeLower0100_024E[$0197]:=$0268;
398 UnicodeLower0100_024E[$0198]:=$0199;
399 UnicodeLower0100_024E[$019C]:=$026F;
400 UnicodeLower0100_024E[$019D]:=$0272;
401 UnicodeLower0100_024E[$019F]:=$0275;
402 UnicodeLower0100_024E[$01A0]:=$01A1;
403 UnicodeLower0100_024E[$01A2]:=$01A3;
404 UnicodeLower0100_024E[$01A4]:=$01A5;
405 UnicodeLower0100_024E[$01A6]:=$0280;
406 UnicodeLower0100_024E[$01A7]:=$01A8;
407 UnicodeLower0100_024E[$01A9]:=$0283;
408 UnicodeLower0100_024E[$01AC]:=$01AD;
409 UnicodeLower0100_024E[$01AE]:=$0288;
410 UnicodeLower0100_024E[$01AF]:=$01B0;
411 UnicodeLower0100_024E[$01B1]:=$028A;
412 UnicodeLower0100_024E[$01B2]:=$028B;
413 UnicodeLower0100_024E[$01B3]:=$01B4;
414 UnicodeLower0100_024E[$01B5]:=$01B6;
415 UnicodeLower0100_024E[$01B7]:=$0292;
416 UnicodeLower0100_024E[$01B8]:=$01B9;
417 UnicodeLower0100_024E[$01BC]:=$01BD;
418 UnicodeLower0100_024E[$01C4]:=$01C6;
419 UnicodeLower0100_024E[$01C5]:=$01C6;
420 UnicodeLower0100_024E[$01C7]:=$01C9;
421 UnicodeLower0100_024E[$01C8]:=$01C9;
422 UnicodeLower0100_024E[$01CA]:=$01CC;
423 UnicodeLower0100_024E[$01CB]:=$01CC;
424 UnicodeLower0100_024E[$01CD]:=$01CE;
425 UnicodeLower0100_024E[$01CF]:=$01D0;
426 UnicodeLower0100_024E[$01D1]:=$01D2;
427 UnicodeLower0100_024E[$01D3]:=$01D4;
428 UnicodeLower0100_024E[$01D5]:=$01D6;
429 UnicodeLower0100_024E[$01D7]:=$01D8;
430 UnicodeLower0100_024E[$01D9]:=$01DA;
431 UnicodeLower0100_024E[$01DB]:=$01DC;
432 UnicodeLower0100_024E[$01DE]:=$01DF;
433 UnicodeLower0100_024E[$01E0]:=$01E1;
434 UnicodeLower0100_024E[$01E2]:=$01E3;
435 UnicodeLower0100_024E[$01E4]:=$01E5;
436 UnicodeLower0100_024E[$01E6]:=$01E7;
437 UnicodeLower0100_024E[$01E8]:=$01E9;
438 UnicodeLower0100_024E[$01EA]:=$01EB;
439 UnicodeLower0100_024E[$01EC]:=$01ED;
440 UnicodeLower0100_024E[$01EE]:=$01EF;
441 UnicodeLower0100_024E[$01F1]:=$01F3;
442 UnicodeLower0100_024E[$01F2]:=$01F3;
443 UnicodeLower0100_024E[$01F4]:=$01F5;
444 UnicodeLower0100_024E[$01F6]:=$0195;
445 UnicodeLower0100_024E[$01F7]:=$01BF;
446 UnicodeLower0100_024E[$01F8]:=$01F9;
447 UnicodeLower0100_024E[$01FA]:=$01FB;
448 UnicodeLower0100_024E[$01FC]:=$01FD;
449 UnicodeLower0100_024E[$01FE]:=$01FF;
450 UnicodeLower0100_024E[$0200]:=$0201;
451 UnicodeLower0100_024E[$0202]:=$0203;
452 UnicodeLower0100_024E[$0204]:=$0205;
453 UnicodeLower0100_024E[$0206]:=$0207;
454 UnicodeLower0100_024E[$0208]:=$0209;
455 UnicodeLower0100_024E[$020A]:=$020B;
456 UnicodeLower0100_024E[$020C]:=$020D;
457 UnicodeLower0100_024E[$020E]:=$020F;
458 UnicodeLower0100_024E[$0210]:=$0211;
459 UnicodeLower0100_024E[$0212]:=$0213;
460 UnicodeLower0100_024E[$0214]:=$0215;
461 UnicodeLower0100_024E[$0216]:=$0217;
462 UnicodeLower0100_024E[$0218]:=$0219;
463 UnicodeLower0100_024E[$021A]:=$021B;
464 UnicodeLower0100_024E[$021C]:=$021D;
465 UnicodeLower0100_024E[$021E]:=$021F;
466 UnicodeLower0100_024E[$0220]:=$019E;
467 UnicodeLower0100_024E[$0222]:=$0223;
468 UnicodeLower0100_024E[$0224]:=$0225;
469 UnicodeLower0100_024E[$0226]:=$0227;
470 UnicodeLower0100_024E[$0228]:=$0229;
471 UnicodeLower0100_024E[$022A]:=$022B;
472 UnicodeLower0100_024E[$022C]:=$022D;
473 UnicodeLower0100_024E[$022E]:=$022F;
474 UnicodeLower0100_024E[$0230]:=$0231;
475 UnicodeLower0100_024E[$0232]:=$0233;
476 UnicodeLower0100_024E[$023A]:=$2C65;
477 UnicodeLower0100_024E[$023B]:=$023C;
478 UnicodeLower0100_024E[$023D]:=$019A;
479 UnicodeLower0100_024E[$023E]:=$2C66;
480 UnicodeLower0100_024E[$0241]:=$0242;
481 UnicodeLower0100_024E[$0243]:=$0180;
482 UnicodeLower0100_024E[$0244]:=$0289;
483 UnicodeLower0100_024E[$0245]:=$028C;
484 UnicodeLower0100_024E[$0246]:=$0247;
485 UnicodeLower0100_024E[$0248]:=$0249;
486 UnicodeLower0100_024E[$024A]:=$024B;
487 UnicodeLower0100_024E[$024C]:=$024D;
488 UnicodeLower0100_024E[$024E]:=$024F;
489
490 for i:=Low(UnicodeLower0386_03AB) to High(UnicodeLower0386_03AB) do
491 UnicodeLower0386_03AB[i]:=i;
492 UnicodeLower0386_03AB[$0386]:=$03AC;
493 UnicodeLower0386_03AB[$0388]:=$03AD;
494 UnicodeLower0386_03AB[$0389]:=$03AE;
495 UnicodeLower0386_03AB[$038A]:=$03AF;
496 UnicodeLower0386_03AB[$038C]:=$03CC;
497 UnicodeLower0386_03AB[$038E]:=$03CD;
498 UnicodeLower0386_03AB[$038F]:=$03CE;
499 UnicodeLower0386_03AB[$0391]:=$03B1;
500 UnicodeLower0386_03AB[$0392]:=$03B2;
501 UnicodeLower0386_03AB[$0393]:=$03B3;
502 UnicodeLower0386_03AB[$0394]:=$03B4;
503 UnicodeLower0386_03AB[$0395]:=$03B5;
504 UnicodeLower0386_03AB[$0396]:=$03B6;
505 UnicodeLower0386_03AB[$0397]:=$03B7;
506 UnicodeLower0386_03AB[$0398]:=$03B8;
507 UnicodeLower0386_03AB[$0399]:=$03B9;
508 UnicodeLower0386_03AB[$039A]:=$03BA;
509 UnicodeLower0386_03AB[$039B]:=$03BB;
510 UnicodeLower0386_03AB[$039C]:=$03BC;
511 UnicodeLower0386_03AB[$039D]:=$03BD;
512 UnicodeLower0386_03AB[$039E]:=$03BE;
513 UnicodeLower0386_03AB[$039F]:=$03BF;
514 UnicodeLower0386_03AB[$03A0]:=$03C0;
515 UnicodeLower0386_03AB[$03A1]:=$03C1;
516 UnicodeLower0386_03AB[$03A3]:=$03C3;
517 UnicodeLower0386_03AB[$03A4]:=$03C4;
518 UnicodeLower0386_03AB[$03A5]:=$03C5;
519 UnicodeLower0386_03AB[$03A6]:=$03C6;
520 UnicodeLower0386_03AB[$03A7]:=$03C7;
521 UnicodeLower0386_03AB[$03A8]:=$03C8;
522 UnicodeLower0386_03AB[$03A9]:=$03C9;
523 UnicodeLower0386_03AB[$03AA]:=$03CA;
524 UnicodeLower0386_03AB[$03AB]:=$03CB;
525
526 for i:=Low(UnicodeLower03D8_042F) to High(UnicodeLower03D8_042F) do
527 UnicodeLower03D8_042F[i]:=i;
528 UnicodeLower03D8_042F[$03D8]:=$03D9;
529 UnicodeLower03D8_042F[$03DA]:=$03DB;
530 UnicodeLower03D8_042F[$03DC]:=$03DD;
531 UnicodeLower03D8_042F[$03DE]:=$03DF;
532 UnicodeLower03D8_042F[$03E0]:=$03E1;
533 UnicodeLower03D8_042F[$03E2]:=$03E3;
534 UnicodeLower03D8_042F[$03E4]:=$03E5;
535 UnicodeLower03D8_042F[$03E6]:=$03E7;
536 UnicodeLower03D8_042F[$03E8]:=$03E9;
537 UnicodeLower03D8_042F[$03EA]:=$03EB;
538 UnicodeLower03D8_042F[$03EC]:=$03ED;
539 UnicodeLower03D8_042F[$03EE]:=$03EF;
540 UnicodeLower03D8_042F[$03F4]:=$03B8;
541 UnicodeLower03D8_042F[$03F7]:=$03F8;
542 UnicodeLower03D8_042F[$03F9]:=$03F2;
543 UnicodeLower03D8_042F[$03FA]:=$03FB;
544 UnicodeLower03D8_042F[$03FD]:=$037B;
545 UnicodeLower03D8_042F[$03FE]:=$037C;
546 UnicodeLower03D8_042F[$03FF]:=$037D;
547 UnicodeLower03D8_042F[$0400]:=$0450;
548 UnicodeLower03D8_042F[$0401]:=$0451;
549 UnicodeLower03D8_042F[$0402]:=$0452;
550 UnicodeLower03D8_042F[$0403]:=$0453;
551 UnicodeLower03D8_042F[$0404]:=$0454;
552 UnicodeLower03D8_042F[$0405]:=$0455;
553 UnicodeLower03D8_042F[$0406]:=$0456;
554 UnicodeLower03D8_042F[$0407]:=$0457;
555 UnicodeLower03D8_042F[$0408]:=$0458;
556 UnicodeLower03D8_042F[$0409]:=$0459;
557 UnicodeLower03D8_042F[$040A]:=$045A;
558 UnicodeLower03D8_042F[$040B]:=$045B;
559 UnicodeLower03D8_042F[$040C]:=$045C;
560 UnicodeLower03D8_042F[$040D]:=$045D;
561 UnicodeLower03D8_042F[$040E]:=$045E;
562 UnicodeLower03D8_042F[$040F]:=$045F;
563 UnicodeLower03D8_042F[$0410]:=$0430;
564 UnicodeLower03D8_042F[$0411]:=$0431;
565 UnicodeLower03D8_042F[$0412]:=$0432;
566 UnicodeLower03D8_042F[$0413]:=$0433;
567 UnicodeLower03D8_042F[$0414]:=$0434;
568 UnicodeLower03D8_042F[$0415]:=$0435;
569 UnicodeLower03D8_042F[$0416]:=$0436;
570 UnicodeLower03D8_042F[$0417]:=$0437;
571 UnicodeLower03D8_042F[$0418]:=$0438;
572 UnicodeLower03D8_042F[$0419]:=$0439;
573 UnicodeLower03D8_042F[$041A]:=$043A;
574 UnicodeLower03D8_042F[$041B]:=$043B;
575 UnicodeLower03D8_042F[$041C]:=$043C;
576 UnicodeLower03D8_042F[$041D]:=$043D;
577 UnicodeLower03D8_042F[$041E]:=$043E;
578 UnicodeLower03D8_042F[$041F]:=$043F;
579 UnicodeLower03D8_042F[$0420]:=$0440;
580 UnicodeLower03D8_042F[$0421]:=$0441;
581 UnicodeLower03D8_042F[$0422]:=$0442;
582 UnicodeLower03D8_042F[$0423]:=$0443;
583 UnicodeLower03D8_042F[$0424]:=$0444;
584 UnicodeLower03D8_042F[$0425]:=$0445;
585 UnicodeLower03D8_042F[$0426]:=$0446;
586 UnicodeLower03D8_042F[$0427]:=$0447;
587 UnicodeLower03D8_042F[$0428]:=$0448;
588 UnicodeLower03D8_042F[$0429]:=$0449;
589 UnicodeLower03D8_042F[$042A]:=$044A;
590 UnicodeLower03D8_042F[$042B]:=$044B;
591 UnicodeLower03D8_042F[$042C]:=$044C;
592 UnicodeLower03D8_042F[$042D]:=$044D;
593 UnicodeLower03D8_042F[$042E]:=$044E;
594 UnicodeLower03D8_042F[$042F]:=$044F;
595
596 for i:=Low(UnicodeLower0460_0512) to High(UnicodeLower0460_0512) do
597 UnicodeLower0460_0512[i]:=i;
598 UnicodeLower0460_0512[$0460]:=$0461;
599 UnicodeLower0460_0512[$0462]:=$0463;
600 UnicodeLower0460_0512[$0464]:=$0465;
601 UnicodeLower0460_0512[$0466]:=$0467;
602 UnicodeLower0460_0512[$0468]:=$0469;
603 UnicodeLower0460_0512[$046A]:=$046B;
604 UnicodeLower0460_0512[$046C]:=$046D;
605 UnicodeLower0460_0512[$046E]:=$046F;
606 UnicodeLower0460_0512[$0470]:=$0471;
607 UnicodeLower0460_0512[$0472]:=$0473;
608 UnicodeLower0460_0512[$0474]:=$0475;
609 UnicodeLower0460_0512[$0476]:=$0477;
610 UnicodeLower0460_0512[$0478]:=$0479;
611 UnicodeLower0460_0512[$047A]:=$047B;
612 UnicodeLower0460_0512[$047C]:=$047D;
613 UnicodeLower0460_0512[$047E]:=$047F;
614 UnicodeLower0460_0512[$0480]:=$0481;
615 UnicodeLower0460_0512[$048A]:=$048B;
616 UnicodeLower0460_0512[$048C]:=$048D;
617 UnicodeLower0460_0512[$048E]:=$048F;
618 UnicodeLower0460_0512[$0490]:=$0491;
619 UnicodeLower0460_0512[$0492]:=$0493;
620 UnicodeLower0460_0512[$0494]:=$0495;
621 UnicodeLower0460_0512[$0496]:=$0497;
622 UnicodeLower0460_0512[$0498]:=$0499;
623 UnicodeLower0460_0512[$049A]:=$049B;
624 UnicodeLower0460_0512[$049C]:=$049D;
625 UnicodeLower0460_0512[$049E]:=$049F;
626 UnicodeLower0460_0512[$04A0]:=$04A1;
627 UnicodeLower0460_0512[$04A2]:=$04A3;
628 UnicodeLower0460_0512[$04A4]:=$04A5;
629 UnicodeLower0460_0512[$04A6]:=$04A7;
630 UnicodeLower0460_0512[$04A8]:=$04A9;
631 UnicodeLower0460_0512[$04AA]:=$04AB;
632 UnicodeLower0460_0512[$04AC]:=$04AD;
633 UnicodeLower0460_0512[$04AE]:=$04AF;
634 UnicodeLower0460_0512[$04B0]:=$04B1;
635 UnicodeLower0460_0512[$04B2]:=$04B3;
636 UnicodeLower0460_0512[$04B4]:=$04B5;
637 UnicodeLower0460_0512[$04B6]:=$04B7;
638 UnicodeLower0460_0512[$04B8]:=$04B9;
639 UnicodeLower0460_0512[$04BA]:=$04BB;
640 UnicodeLower0460_0512[$04BC]:=$04BD;
641 UnicodeLower0460_0512[$04BE]:=$04BF;
642 UnicodeLower0460_0512[$04C0]:=$04CF;
643 UnicodeLower0460_0512[$04C1]:=$04C2;
644 UnicodeLower0460_0512[$04C3]:=$04C4;
645 UnicodeLower0460_0512[$04C5]:=$04C6;
646 UnicodeLower0460_0512[$04C7]:=$04C8;
647 UnicodeLower0460_0512[$04C9]:=$04CA;
648 UnicodeLower0460_0512[$04CB]:=$04CC;
649 UnicodeLower0460_0512[$04CD]:=$04CE;
650 UnicodeLower0460_0512[$04D0]:=$04D1;
651 UnicodeLower0460_0512[$04D2]:=$04D3;
652 UnicodeLower0460_0512[$04D4]:=$04D5;
653 UnicodeLower0460_0512[$04D6]:=$04D7;
654 UnicodeLower0460_0512[$04D8]:=$04D9;
655 UnicodeLower0460_0512[$04DA]:=$04DB;
656 UnicodeLower0460_0512[$04DC]:=$04DD;
657 UnicodeLower0460_0512[$04DE]:=$04DF;
658 UnicodeLower0460_0512[$04E0]:=$04E1;
659 UnicodeLower0460_0512[$04E2]:=$04E3;
660 UnicodeLower0460_0512[$04E4]:=$04E5;
661 UnicodeLower0460_0512[$04E6]:=$04E7;
662 UnicodeLower0460_0512[$04E8]:=$04E9;
663 UnicodeLower0460_0512[$04EA]:=$04EB;
664 UnicodeLower0460_0512[$04EC]:=$04ED;
665 UnicodeLower0460_0512[$04EE]:=$04EF;
666 UnicodeLower0460_0512[$04F0]:=$04F1;
667 UnicodeLower0460_0512[$04F2]:=$04F3;
668 UnicodeLower0460_0512[$04F4]:=$04F5;
669 UnicodeLower0460_0512[$04F6]:=$04F7;
670 UnicodeLower0460_0512[$04F8]:=$04F9;
671 UnicodeLower0460_0512[$04FA]:=$04FB;
672 UnicodeLower0460_0512[$04FC]:=$04FD;
673 UnicodeLower0460_0512[$04FE]:=$04FF;
674 UnicodeLower0460_0512[$0500]:=$0501;
675 UnicodeLower0460_0512[$0502]:=$0503;
676 UnicodeLower0460_0512[$0504]:=$0505;
677 UnicodeLower0460_0512[$0506]:=$0507;
678 UnicodeLower0460_0512[$0508]:=$0509;
679 UnicodeLower0460_0512[$050A]:=$050B;
680 UnicodeLower0460_0512[$050C]:=$050D;
681 UnicodeLower0460_0512[$050E]:=$050F;
682 UnicodeLower0460_0512[$0510]:=$0511;
683 UnicodeLower0460_0512[$0512]:=$0513;
684
685 for i:=Low(UnicodeLower1E00_1FFC) to High(UnicodeLower1E00_1FFC) do
686 UnicodeLower1E00_1FFC[i]:=i;
687 UnicodeLower1E00_1FFC[$1E00]:=$1E01;
688 UnicodeLower1E00_1FFC[$1E02]:=$1E03;
689 UnicodeLower1E00_1FFC[$1E04]:=$1E05;
690 UnicodeLower1E00_1FFC[$1E06]:=$1E07;
691 UnicodeLower1E00_1FFC[$1E08]:=$1E09;
692 UnicodeLower1E00_1FFC[$1E0A]:=$1E0B;
693 UnicodeLower1E00_1FFC[$1E0C]:=$1E0D;
694 UnicodeLower1E00_1FFC[$1E0E]:=$1E0F;
695 UnicodeLower1E00_1FFC[$1E10]:=$1E11;
696 UnicodeLower1E00_1FFC[$1E12]:=$1E13;
697 UnicodeLower1E00_1FFC[$1E14]:=$1E15;
698 UnicodeLower1E00_1FFC[$1E16]:=$1E17;
699 UnicodeLower1E00_1FFC[$1E18]:=$1E19;
700 UnicodeLower1E00_1FFC[$1E1A]:=$1E1B;
701 UnicodeLower1E00_1FFC[$1E1C]:=$1E1D;
702 UnicodeLower1E00_1FFC[$1E1E]:=$1E1F;
703 UnicodeLower1E00_1FFC[$1E20]:=$1E21;
704 UnicodeLower1E00_1FFC[$1E22]:=$1E23;
705 UnicodeLower1E00_1FFC[$1E24]:=$1E25;
706 UnicodeLower1E00_1FFC[$1E26]:=$1E27;
707 UnicodeLower1E00_1FFC[$1E28]:=$1E29;
708 UnicodeLower1E00_1FFC[$1E2A]:=$1E2B;
709 UnicodeLower1E00_1FFC[$1E2C]:=$1E2D;
710 UnicodeLower1E00_1FFC[$1E2E]:=$1E2F;
711 UnicodeLower1E00_1FFC[$1E30]:=$1E31;
712 UnicodeLower1E00_1FFC[$1E32]:=$1E33;
713 UnicodeLower1E00_1FFC[$1E34]:=$1E35;
714 UnicodeLower1E00_1FFC[$1E36]:=$1E37;
715 UnicodeLower1E00_1FFC[$1E38]:=$1E39;
716 UnicodeLower1E00_1FFC[$1E3A]:=$1E3B;
717 UnicodeLower1E00_1FFC[$1E3C]:=$1E3D;
718 UnicodeLower1E00_1FFC[$1E3E]:=$1E3F;
719 UnicodeLower1E00_1FFC[$1E40]:=$1E41;
720 UnicodeLower1E00_1FFC[$1E42]:=$1E43;
721 UnicodeLower1E00_1FFC[$1E44]:=$1E45;
722 UnicodeLower1E00_1FFC[$1E46]:=$1E47;
723 UnicodeLower1E00_1FFC[$1E48]:=$1E49;
724 UnicodeLower1E00_1FFC[$1E4A]:=$1E4B;
725 UnicodeLower1E00_1FFC[$1E4C]:=$1E4D;
726 UnicodeLower1E00_1FFC[$1E4E]:=$1E4F;
727 UnicodeLower1E00_1FFC[$1E50]:=$1E51;
728 UnicodeLower1E00_1FFC[$1E52]:=$1E53;
729 UnicodeLower1E00_1FFC[$1E54]:=$1E55;
730 UnicodeLower1E00_1FFC[$1E56]:=$1E57;
731 UnicodeLower1E00_1FFC[$1E58]:=$1E59;
732 UnicodeLower1E00_1FFC[$1E5A]:=$1E5B;
733 UnicodeLower1E00_1FFC[$1E5C]:=$1E5D;
734 UnicodeLower1E00_1FFC[$1E5E]:=$1E5F;
735 UnicodeLower1E00_1FFC[$1E60]:=$1E61;
736 UnicodeLower1E00_1FFC[$1E62]:=$1E63;
737 UnicodeLower1E00_1FFC[$1E64]:=$1E65;
738 UnicodeLower1E00_1FFC[$1E66]:=$1E67;
739 UnicodeLower1E00_1FFC[$1E68]:=$1E69;
740 UnicodeLower1E00_1FFC[$1E6A]:=$1E6B;
741 UnicodeLower1E00_1FFC[$1E6C]:=$1E6D;
742 UnicodeLower1E00_1FFC[$1E6E]:=$1E6F;
743 UnicodeLower1E00_1FFC[$1E70]:=$1E71;
744 UnicodeLower1E00_1FFC[$1E72]:=$1E73;
745 UnicodeLower1E00_1FFC[$1E74]:=$1E75;
746 UnicodeLower1E00_1FFC[$1E76]:=$1E77;
747 UnicodeLower1E00_1FFC[$1E78]:=$1E79;
748 UnicodeLower1E00_1FFC[$1E7A]:=$1E7B;
749 UnicodeLower1E00_1FFC[$1E7C]:=$1E7D;
750 UnicodeLower1E00_1FFC[$1E7E]:=$1E7F;
751 UnicodeLower1E00_1FFC[$1E80]:=$1E81;
752 UnicodeLower1E00_1FFC[$1E82]:=$1E83;
753 UnicodeLower1E00_1FFC[$1E84]:=$1E85;
754 UnicodeLower1E00_1FFC[$1E86]:=$1E87;
755 UnicodeLower1E00_1FFC[$1E88]:=$1E89;
756 UnicodeLower1E00_1FFC[$1E8A]:=$1E8B;
757 UnicodeLower1E00_1FFC[$1E8C]:=$1E8D;
758 UnicodeLower1E00_1FFC[$1E8E]:=$1E8F;
759 UnicodeLower1E00_1FFC[$1E90]:=$1E91;
760 UnicodeLower1E00_1FFC[$1E92]:=$1E93;
761 UnicodeLower1E00_1FFC[$1E94]:=$1E95;
762 UnicodeLower1E00_1FFC[$1EA0]:=$1EA1;
763 UnicodeLower1E00_1FFC[$1EA2]:=$1EA3;
764 UnicodeLower1E00_1FFC[$1EA4]:=$1EA5;
765 UnicodeLower1E00_1FFC[$1EA6]:=$1EA7;
766 UnicodeLower1E00_1FFC[$1EA8]:=$1EA9;
767 UnicodeLower1E00_1FFC[$1EAA]:=$1EAB;
768 UnicodeLower1E00_1FFC[$1EAC]:=$1EAD;
769 UnicodeLower1E00_1FFC[$1EAE]:=$1EAF;
770 UnicodeLower1E00_1FFC[$1EB0]:=$1EB1;
771 UnicodeLower1E00_1FFC[$1EB2]:=$1EB3;
772 UnicodeLower1E00_1FFC[$1EB4]:=$1EB5;
773 UnicodeLower1E00_1FFC[$1EB6]:=$1EB7;
774 UnicodeLower1E00_1FFC[$1EB8]:=$1EB9;
775 UnicodeLower1E00_1FFC[$1EBA]:=$1EBB;
776 UnicodeLower1E00_1FFC[$1EBC]:=$1EBD;
777 UnicodeLower1E00_1FFC[$1EBE]:=$1EBF;
778 UnicodeLower1E00_1FFC[$1EC0]:=$1EC1;
779 UnicodeLower1E00_1FFC[$1EC2]:=$1EC3;
780 UnicodeLower1E00_1FFC[$1EC4]:=$1EC5;
781 UnicodeLower1E00_1FFC[$1EC6]:=$1EC7;
782 UnicodeLower1E00_1FFC[$1EC8]:=$1EC9;
783 UnicodeLower1E00_1FFC[$1ECA]:=$1ECB;
784 UnicodeLower1E00_1FFC[$1ECC]:=$1ECD;
785 UnicodeLower1E00_1FFC[$1ECE]:=$1ECF;
786 UnicodeLower1E00_1FFC[$1ED0]:=$1ED1;
787 UnicodeLower1E00_1FFC[$1ED2]:=$1ED3;
788 UnicodeLower1E00_1FFC[$1ED4]:=$1ED5;
789 UnicodeLower1E00_1FFC[$1ED6]:=$1ED7;
790 UnicodeLower1E00_1FFC[$1ED8]:=$1ED9;
791 UnicodeLower1E00_1FFC[$1EDA]:=$1EDB;
792 UnicodeLower1E00_1FFC[$1EDC]:=$1EDD;
793 UnicodeLower1E00_1FFC[$1EDE]:=$1EDF;
794 UnicodeLower1E00_1FFC[$1EE0]:=$1EE1;
795 UnicodeLower1E00_1FFC[$1EE2]:=$1EE3;
796 UnicodeLower1E00_1FFC[$1EE4]:=$1EE5;
797 UnicodeLower1E00_1FFC[$1EE6]:=$1EE7;
798 UnicodeLower1E00_1FFC[$1EE8]:=$1EE9;
799 UnicodeLower1E00_1FFC[$1EEA]:=$1EEB;
800 UnicodeLower1E00_1FFC[$1EEC]:=$1EED;
801 UnicodeLower1E00_1FFC[$1EEE]:=$1EEF;
802 UnicodeLower1E00_1FFC[$1EF0]:=$1EF1;
803 UnicodeLower1E00_1FFC[$1EF2]:=$1EF3;
804 UnicodeLower1E00_1FFC[$1EF4]:=$1EF5;
805 UnicodeLower1E00_1FFC[$1EF6]:=$1EF7;
806 UnicodeLower1E00_1FFC[$1EF8]:=$1EF9;
807 UnicodeLower1E00_1FFC[$1F08]:=$1F00;
808 UnicodeLower1E00_1FFC[$1F09]:=$1F01;
809 UnicodeLower1E00_1FFC[$1F0A]:=$1F02;
810 UnicodeLower1E00_1FFC[$1F0B]:=$1F03;
811 UnicodeLower1E00_1FFC[$1F0C]:=$1F04;
812 UnicodeLower1E00_1FFC[$1F0D]:=$1F05;
813 UnicodeLower1E00_1FFC[$1F0E]:=$1F06;
814 UnicodeLower1E00_1FFC[$1F0F]:=$1F07;
815 UnicodeLower1E00_1FFC[$1F18]:=$1F10;
816 UnicodeLower1E00_1FFC[$1F19]:=$1F11;
817 UnicodeLower1E00_1FFC[$1F1A]:=$1F12;
818 UnicodeLower1E00_1FFC[$1F1B]:=$1F13;
819 UnicodeLower1E00_1FFC[$1F1C]:=$1F14;
820 UnicodeLower1E00_1FFC[$1F1D]:=$1F15;
821 UnicodeLower1E00_1FFC[$1F28]:=$1F20;
822 UnicodeLower1E00_1FFC[$1F29]:=$1F21;
823 UnicodeLower1E00_1FFC[$1F2A]:=$1F22;
824 UnicodeLower1E00_1FFC[$1F2B]:=$1F23;
825 UnicodeLower1E00_1FFC[$1F2C]:=$1F24;
826 UnicodeLower1E00_1FFC[$1F2D]:=$1F25;
827 UnicodeLower1E00_1FFC[$1F2E]:=$1F26;
828 UnicodeLower1E00_1FFC[$1F2F]:=$1F27;
829 UnicodeLower1E00_1FFC[$1F38]:=$1F30;
830 UnicodeLower1E00_1FFC[$1F39]:=$1F31;
831 UnicodeLower1E00_1FFC[$1F3A]:=$1F32;
832 UnicodeLower1E00_1FFC[$1F3B]:=$1F33;
833 UnicodeLower1E00_1FFC[$1F3C]:=$1F34;
834 UnicodeLower1E00_1FFC[$1F3D]:=$1F35;
835 UnicodeLower1E00_1FFC[$1F3E]:=$1F36;
836 UnicodeLower1E00_1FFC[$1F3F]:=$1F37;
837 UnicodeLower1E00_1FFC[$1F48]:=$1F40;
838 UnicodeLower1E00_1FFC[$1F49]:=$1F41;
839 UnicodeLower1E00_1FFC[$1F4A]:=$1F42;
840 UnicodeLower1E00_1FFC[$1F4B]:=$1F43;
841 UnicodeLower1E00_1FFC[$1F4C]:=$1F44;
842 UnicodeLower1E00_1FFC[$1F4D]:=$1F45;
843 UnicodeLower1E00_1FFC[$1F59]:=$1F51;
844 UnicodeLower1E00_1FFC[$1F5B]:=$1F53;
845 UnicodeLower1E00_1FFC[$1F5D]:=$1F55;
846 UnicodeLower1E00_1FFC[$1F5F]:=$1F57;
847 UnicodeLower1E00_1FFC[$1F68]:=$1F60;
848 UnicodeLower1E00_1FFC[$1F69]:=$1F61;
849 UnicodeLower1E00_1FFC[$1F6A]:=$1F62;
850 UnicodeLower1E00_1FFC[$1F6B]:=$1F63;
851 UnicodeLower1E00_1FFC[$1F6C]:=$1F64;
852 UnicodeLower1E00_1FFC[$1F6D]:=$1F65;
853 UnicodeLower1E00_1FFC[$1F6E]:=$1F66;
854 UnicodeLower1E00_1FFC[$1F6F]:=$1F67;
855 UnicodeLower1E00_1FFC[$1F88]:=$1F80;
856 UnicodeLower1E00_1FFC[$1F89]:=$1F81;
857 UnicodeLower1E00_1FFC[$1F8A]:=$1F82;
858 UnicodeLower1E00_1FFC[$1F8B]:=$1F83;
859 UnicodeLower1E00_1FFC[$1F8C]:=$1F84;
860 UnicodeLower1E00_1FFC[$1F8D]:=$1F85;
861 UnicodeLower1E00_1FFC[$1F8E]:=$1F86;
862 UnicodeLower1E00_1FFC[$1F8F]:=$1F87;
863 UnicodeLower1E00_1FFC[$1F98]:=$1F90;
864 UnicodeLower1E00_1FFC[$1F99]:=$1F91;
865 UnicodeLower1E00_1FFC[$1F9A]:=$1F92;
866 UnicodeLower1E00_1FFC[$1F9B]:=$1F93;
867 UnicodeLower1E00_1FFC[$1F9C]:=$1F94;
868 UnicodeLower1E00_1FFC[$1F9D]:=$1F95;
869 UnicodeLower1E00_1FFC[$1F9E]:=$1F96;
870 UnicodeLower1E00_1FFC[$1F9F]:=$1F97;
871 UnicodeLower1E00_1FFC[$1FA8]:=$1FA0;
872 UnicodeLower1E00_1FFC[$1FA9]:=$1FA1;
873 UnicodeLower1E00_1FFC[$1FAA]:=$1FA2;
874 UnicodeLower1E00_1FFC[$1FAB]:=$1FA3;
875 UnicodeLower1E00_1FFC[$1FAC]:=$1FA4;
876 UnicodeLower1E00_1FFC[$1FAD]:=$1FA5;
877 UnicodeLower1E00_1FFC[$1FAE]:=$1FA6;
878 UnicodeLower1E00_1FFC[$1FAF]:=$1FA7;
879 UnicodeLower1E00_1FFC[$1FB8]:=$1FB0;
880 UnicodeLower1E00_1FFC[$1FB9]:=$1FB1;
881 UnicodeLower1E00_1FFC[$1FBA]:=$1F70;
882 UnicodeLower1E00_1FFC[$1FBB]:=$1F71;
883 UnicodeLower1E00_1FFC[$1FBC]:=$1FB3;
884 UnicodeLower1E00_1FFC[$1FC8]:=$1F72;
885 UnicodeLower1E00_1FFC[$1FC9]:=$1F73;
886 UnicodeLower1E00_1FFC[$1FCA]:=$1F74;
887 UnicodeLower1E00_1FFC[$1FCB]:=$1F75;
888 UnicodeLower1E00_1FFC[$1FCC]:=$1FC3;
889 UnicodeLower1E00_1FFC[$1FD8]:=$1FD0;
890 UnicodeLower1E00_1FFC[$1FD9]:=$1FD1;
891 UnicodeLower1E00_1FFC[$1FDA]:=$1F76;
892 UnicodeLower1E00_1FFC[$1FDB]:=$1F77;
893 UnicodeLower1E00_1FFC[$1FE8]:=$1FE0;
894 UnicodeLower1E00_1FFC[$1FE9]:=$1FE1;
895 UnicodeLower1E00_1FFC[$1FEA]:=$1F7A;
896 UnicodeLower1E00_1FFC[$1FEB]:=$1F7B;
897 UnicodeLower1E00_1FFC[$1FEC]:=$1FE5;
898 UnicodeLower1E00_1FFC[$1FF8]:=$1F78;
899 UnicodeLower1E00_1FFC[$1FF9]:=$1F79;
900 UnicodeLower1E00_1FFC[$1FFA]:=$1F7C;
901 UnicodeLower1E00_1FFC[$1FFB]:=$1F7D;
902 UnicodeLower1E00_1FFC[$1FFC]:=$1FF3;
903
904 for i:=Low(UnicodeLower2126_2183) to High(UnicodeLower2126_2183) do
905 UnicodeLower2126_2183[i]:=i;
906 UnicodeLower2126_2183[$2126]:=$03C9;
907 UnicodeLower2126_2183[$212A]:=$006B;
908 UnicodeLower2126_2183[$212B]:=$00E5;
909 UnicodeLower2126_2183[$2132]:=$214E;
910 UnicodeLower2126_2183[$2160]:=$2170;
911 UnicodeLower2126_2183[$2161]:=$2171;
912 UnicodeLower2126_2183[$2162]:=$2172;
913 UnicodeLower2126_2183[$2163]:=$2173;
914 UnicodeLower2126_2183[$2164]:=$2174;
915 UnicodeLower2126_2183[$2165]:=$2175;
916 UnicodeLower2126_2183[$2166]:=$2176;
917 UnicodeLower2126_2183[$2167]:=$2177;
918 UnicodeLower2126_2183[$2168]:=$2178;
919 UnicodeLower2126_2183[$2169]:=$2179;
920 UnicodeLower2126_2183[$216A]:=$217A;
921 UnicodeLower2126_2183[$216B]:=$217B;
922 UnicodeLower2126_2183[$216C]:=$217C;
923 UnicodeLower2126_2183[$216D]:=$217D;
924 UnicodeLower2126_2183[$216E]:=$217E;
925 UnicodeLower2126_2183[$216F]:=$217F;
926 UnicodeLower2126_2183[$2183]:=$2184;
927
928 for i:=Low(UnicodeLower2C60_2CE2) to High(UnicodeLower2C60_2CE2) do
929 UnicodeLower2C60_2CE2[i]:=i;
930 UnicodeLower2C60_2CE2[$2C60]:=$2C61;
931 UnicodeLower2C60_2CE2[$2C62]:=$026B;
932 UnicodeLower2C60_2CE2[$2C63]:=$1D7D;
933 UnicodeLower2C60_2CE2[$2C64]:=$027D;
934 UnicodeLower2C60_2CE2[$2C67]:=$2C68;
935 UnicodeLower2C60_2CE2[$2C69]:=$2C6A;
936 UnicodeLower2C60_2CE2[$2C6B]:=$2C6C;
937 UnicodeLower2C60_2CE2[$2C75]:=$2C76;
938 UnicodeLower2C60_2CE2[$2C80]:=$2C81;
939 UnicodeLower2C60_2CE2[$2C82]:=$2C83;
940 UnicodeLower2C60_2CE2[$2C84]:=$2C85;
941 UnicodeLower2C60_2CE2[$2C86]:=$2C87;
942 UnicodeLower2C60_2CE2[$2C88]:=$2C89;
943 UnicodeLower2C60_2CE2[$2C8A]:=$2C8B;
944 UnicodeLower2C60_2CE2[$2C8C]:=$2C8D;
945 UnicodeLower2C60_2CE2[$2C8E]:=$2C8F;
946 UnicodeLower2C60_2CE2[$2C90]:=$2C91;
947 UnicodeLower2C60_2CE2[$2C92]:=$2C93;
948 UnicodeLower2C60_2CE2[$2C94]:=$2C95;
949 UnicodeLower2C60_2CE2[$2C96]:=$2C97;
950 UnicodeLower2C60_2CE2[$2C98]:=$2C99;
951 UnicodeLower2C60_2CE2[$2C9A]:=$2C9B;
952 UnicodeLower2C60_2CE2[$2C9C]:=$2C9D;
953 UnicodeLower2C60_2CE2[$2C9E]:=$2C9F;
954 UnicodeLower2C60_2CE2[$2CA0]:=$2CA1;
955 UnicodeLower2C60_2CE2[$2CA2]:=$2CA3;
956 UnicodeLower2C60_2CE2[$2CA4]:=$2CA5;
957 UnicodeLower2C60_2CE2[$2CA6]:=$2CA7;
958 UnicodeLower2C60_2CE2[$2CA8]:=$2CA9;
959 UnicodeLower2C60_2CE2[$2CAA]:=$2CAB;
960 UnicodeLower2C60_2CE2[$2CAC]:=$2CAD;
961 UnicodeLower2C60_2CE2[$2CAE]:=$2CAF;
962 UnicodeLower2C60_2CE2[$2CB0]:=$2CB1;
963 UnicodeLower2C60_2CE2[$2CB2]:=$2CB3;
964 UnicodeLower2C60_2CE2[$2CB4]:=$2CB5;
965 UnicodeLower2C60_2CE2[$2CB6]:=$2CB7;
966 UnicodeLower2C60_2CE2[$2CB8]:=$2CB9;
967 UnicodeLower2C60_2CE2[$2CBA]:=$2CBB;
968 UnicodeLower2C60_2CE2[$2CBC]:=$2CBD;
969 UnicodeLower2C60_2CE2[$2CBE]:=$2CBF;
970 UnicodeLower2C60_2CE2[$2CC0]:=$2CC1;
971 UnicodeLower2C60_2CE2[$2CC2]:=$2CC3;
972 UnicodeLower2C60_2CE2[$2CC4]:=$2CC5;
973 UnicodeLower2C60_2CE2[$2CC6]:=$2CC7;
974 UnicodeLower2C60_2CE2[$2CC8]:=$2CC9;
975 UnicodeLower2C60_2CE2[$2CCA]:=$2CCB;
976 UnicodeLower2C60_2CE2[$2CCC]:=$2CCD;
977 UnicodeLower2C60_2CE2[$2CCE]:=$2CCF;
978 UnicodeLower2C60_2CE2[$2CD0]:=$2CD1;
979 UnicodeLower2C60_2CE2[$2CD2]:=$2CD3;
980 UnicodeLower2C60_2CE2[$2CD4]:=$2CD5;
981 UnicodeLower2C60_2CE2[$2CD6]:=$2CD7;
982 UnicodeLower2C60_2CE2[$2CD8]:=$2CD9;
983 UnicodeLower2C60_2CE2[$2CDA]:=$2CDB;
984 UnicodeLower2C60_2CE2[$2CDC]:=$2CDD;
985 UnicodeLower2C60_2CE2[$2CDE]:=$2CDF;
986 UnicodeLower2C60_2CE2[$2CE0]:=$2CE1;
987 UnicodeLower2C60_2CE2[$2CE2]:=$2CE3;
988 end;
989
990 function UnicodeLowercase(u: cardinal): cardinal;
991 begin
992 if u<$00C0 then begin
993 // most common
994 if (u>=$0041) and (u<=$0061) then
995 Result:=u+32
996 else
997 Result:=u;
998 end else
999 case u of
1000 $00C0..$00DE: Result:=UnicodeLower00C0_00DE[u];
1001 $0100..$024E: Result:=UnicodeLower0100_024E[u];
1002 $0386..$03AB: Result:=UnicodeLower0386_03AB[u];
1003 $03D8..$042F: Result:=UnicodeLower03D8_042F[u];
1004 $0460..$0512: Result:=UnicodeLower0460_0512[u];
1005 $0531..$0556: Result:=u+48;
1006 $10A0..$10C5: Result:=u+7264;
1007 $1E00..$1FFC: Result:=UnicodeLower1E00_1FFC[u];
1008 $2126..$2183: Result:=UnicodeLower2126_2183[u];
1009 $24B6..$24CF: Result:=u+26;
1010 $2C00..$2C2E: Result:=u+48;
1011 $2C60..$2CE2: Result:=UnicodeLower2C60_2CE2[u];
1012 $FF21..$FF3A: Result:=u+32;
1013 else Result:=u;
1014 end;
1015 end;
1016
1017 {$IFDEF FPC}
1018 function UTF8LowercaseDynLength(const s: string): string;
1019 var
1020 Buf: shortstring;
1021 SrcPos: PtrInt;
1022 DstPos: PtrInt;
1023 CharLen: integer;
1024 OldCode: LongWord;
1025 NewCode: LongWord;
1026 begin
1027 // first compute needed length
1028 SrcPos:=1;
1029 DstPos:=1;
1030 while SrcPos<=length(s) do begin
1031 case s[SrcPos] of
1032 #192..#240:
1033 begin
1034 OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen);
1035 NewCode:=UnicodeLowercase(OldCode);
1036 if NewCode=OldCode then begin
1037 inc(DstPos,CharLen);
1038 end else begin
1039 inc(DstPos,UnicodeToUTF8(NewCode,@Buf[1]));
1040 end;
1041 inc(SrcPos,CharLen);
1042 end;
1043 else
1044 inc(SrcPos);
1045 inc(DstPos);
1046 end;
1047 end;
1048 SetLength(Result,DstPos-1);
1049 if Result='' then exit;
1050 // create the new string
1051 SrcPos:=1;
1052 DstPos:=1;
1053 while SrcPos<=length(s) do begin
1054 case s[SrcPos] of
1055 #192..#240:
1056 begin
1057 OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen);
1058 NewCode:=UnicodeLowercase(OldCode);
1059 if NewCode=OldCode then begin
1060 System.Move(s[SrcPos],Result[DstPos],CharLen);
1061 inc(DstPos,CharLen);
1062 end else begin
1063 inc(DstPos,UnicodeToUTF8(NewCode,@Result[DstPos]));
1064 end;
1065 inc(SrcPos,CharLen);
1066 end;
1067 else
1068 Result[DstPos]:=s[SrcPos];
1069 inc(SrcPos);
1070 inc(DstPos);
1071 end;
1072 end;
1073 end;
1074
1075 function UTF8LowerCaseViaTables(const s: string): string;
1076 var
1077 i: PtrInt;
1078 CharLen: integer;
1079 OldCode: LongWord;
1080 NewCode: LongWord;
1081 NewCharLen: integer;
1082 Changed: Boolean;
1083 p: PChar;
1084 begin
1085 Result:=s;
1086 if Result='' then exit;
1087 Changed:=false;
1088 p:=PChar(Result);
1089 repeat
1090 case p^ of
1091 #0:
1092 if p-PChar(Result)=length(Result) then
1093 exit
1094 else
1095 inc(p);
1096 'A'..'Z': // First ASCII chars
1097 begin
1098 if not Changed then begin
1099 i:=p-PChar(Result)+1;
1100 UniqueString(Result);
1101 Changed:=true;
1102 p:=@Result[i];
1103 end;
1104 p^:=chr(ord(p^)+32);
1105 inc(p);
1106 end;
1107
1108 #192..#240: // Now chars with multiple bytes
1109 begin
1110 OldCode:=UTF8CodepointToUnicode(p,CharLen);
1111 NewCode:=UnicodeLowercase(OldCode);
1112 if NewCode<>OldCode then begin
1113 if not Changed then begin
1114 i:=p-PChar(Result)+1;
1115 UniqueString(Result);
1116 Changed:=true;
1117 p:=@Result[i];
1118 end;
1119 NewCharLen:=UnicodeToUTF8(NewCode,p);
1120 if CharLen<>NewCharLen then begin
1121 // string size changed => use slower function
1122 Result:=UTF8LowercaseDynLength(s);
1123 exit;
1124 end;
1125 end;
1126 inc(p,CharLen);
1127 end;
1128 else
1129 inc(p);
1130 end;
1131 until false;
1132 end;
1133 {$ENDIF}
1134
1135 initialization
1136 InitUnicodeTables;
1137 end.
1138
1139