1{%MainUnit lazutf8.pas} 2 3{$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)} 4 {$DEFINE ArgsWAsUTF8} 5{$ENDIF} 6 7var 8 //Function prototypes 9 _ParamStrUtf8: Function(Param: Integer): string; 10 11var 12 ArgsW: Array of WideString; 13 ArgsWCount: Integer; // length(ArgsW)+1 14 {$IFDEF ArgsWAsUTF8} 15 ArgsUTF8: Array of String; // the ArgsW array as UTF8 16 OldArgV: PPChar = nil; 17 {$IFEND} 18 19//************ START "Stubs" that just call Ansi or Wide implementation 20 21function ParamStrUTF8(Param: Integer): string; 22begin 23 Result := _ParamStrUtf8(Param); 24end; 25 26//************ END "Stubs" that just call Ansi or Wide implementation 27 28 29//*************** START Non WideString implementations 30{$ifndef wince} 31function ParamStrUtf8Ansi(Param: Integer): String; 32begin 33 Result:=SysToUTF8(ObjPas.ParamStr(Param)); 34end; 35{$endif wince} 36 37//*************** END Non WideString impementations 38 39 40 41 42//*************** START WideString impementations 43 44 45{$IFDEF ArgsWAsUTF8} 46procedure SetupArgvAsUtf8; 47var 48 i: Integer; 49begin 50 SetLength(ArgsUTF8,length(ArgsW)); 51 OldArgV:=argv; 52 GetMem(argv,SizeOf(Pointer)*length(ArgsW)); 53 for i:=0 to length(ArgsW)-1 do 54 begin 55 ArgsUTF8[i]:=ArgsW{%H-}[i]; 56 argv[i]:=PChar(ArgsUTF8[i]); 57 end; 58end; 59{$endif} 60 61procedure SetupCommandlineParametersWide; 62var 63 ArgLen, Start, CmdLen, i, j: SizeInt; 64 Quote : Boolean; 65 Buf: array[0..259] of WChar; // need MAX_PATH bytes, not 256! 66 PCmdLineW: PWideChar; 67 CmdLineW: WideString; 68 69 procedure AllocArg(Idx, Len:longint); 70 begin 71 if (Idx >= ArgsWCount) then 72 SetLength(ArgsW, Idx + 1); 73 SetLength(ArgsW[Idx], Len); 74 end; 75 76begin 77 { create commandline, it starts with the executed filename which is argv[0] } 78 { Win32 passes the command NOT via the args, but via getmodulefilename} 79 ArgsWCount := 0; 80 ArgLen := GetModuleFileNameW(0, @buf[0], sizeof(buf)); 81 82 //writeln('ArgLen = ',Arglen); 83 84 buf[ArgLen] := #0; // be safe, no terminating 0 on XP 85 allocarg(0,arglen); 86 move(buf[0],ArgsW[0][1],arglen * SizeOf(WChar)); 87 88 //writeln('ArgsW[0] = ',ArgsW[0]); 89 90 PCmdLineW := nil; 91 { Setup cmdline variable } 92 PCmdLineW := GetCommandLineW; 93 CmdLen := StrLen(PCmdLineW); 94 95 //writeln('StrLen(PCmdLineW) = ',CmdLen); 96 97 SetLength(CmdLineW, CmdLen); 98 Move(PCmdLineW^, CmdLineW[1], CmdLen * SizeOf(WChar)); 99 100 101 //debugln(CmdLineW); 102 //for i := 1 to CmdLen do DbgOut(DbgS(i mod 10)); debugln; 103 104 i := 1; 105 while (i <= CmdLen) do 106 begin 107 //debugln('Next'); 108 //DbgOut('i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0'); 109 //skip leading spaces 110 while (i <= CmdLen) and (CmdLineW[i] <= #32) do Inc(i); 111 //DbgOut('After skipping spaces: i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0'); 112 if (i > CmdLen) then Break; 113 Quote := False; 114 Start := i; 115 ArgLen := 0; 116 while (i <= CmdLen) do 117 begin //find next commandline parameter 118 case CmdLineW[i] of 119 #1..#32: 120 begin 121 if Quote then 122 begin 123 //debugln('i=',DbgS(i),': Space in Quote'); 124 Inc(ArgLen) 125 end 126 else 127 begin 128 //debugln('i=',DbgS(i),': Space in NOT Quote'); 129 Break; 130 end; 131 end; 132 '"': 133 begin 134 if (i < CmdLen) and (CmdLineW[i+1] <> '"') then 135 begin 136 //debugln('i=',DbgS(i),': Quote := not Quote'); 137 Quote := not Quote 138 end 139 else 140 begin 141 //debugln('i=',DbgS(i),': Skip Quote'); 142 Inc(i); 143 end; 144 end; 145 else Inc(ArgLen); 146 end;//case 147 Inc(i); 148 end; //find next commandline parameter 149 150 //debugln('ArgWCount=',DbgS(ArgsWCount),' Start=',DbgS(start),' ArgLen=',DbgS(arglen),' i=',DbgS(i)); 151 152 //we already have (a better) ArgW[0] 153 if (ArgsWCount > 0) then 154 begin //Process commandline parameter 155 AllocArg(ArgsWCount, ArgLen); 156 Quote := False; 157 i := Start; 158 j := 1; 159 while (i <= CmdLen) do 160 begin 161 case CmdLineW[i] of 162 #1..#32: 163 begin 164 if Quote then 165 begin 166 //if j > ArgLen then debugln('Error whitespace: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen)); 167 ArgsW[ArgsWCount][j] := CmdLineW[i]; 168 Inc(j); 169 end 170 else 171 Break; 172 end; 173 '"': 174 begin 175 if (i < CmdLen) and (CmdLineW[i+1] <> '"') then 176 Quote := not Quote 177 else 178 Inc(i); 179 end; 180 else 181 begin 182 //if j > ArgLen then debugln('Error Quote: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen)); 183 ArgsW[ArgsWCount][j] := CmdLineW[i]; 184 Inc(j); 185 end; 186 end; 187 Inc(i); 188 end; 189 190 //debugln('ArgsW[',DbgS(ArgsWCount),'] = ',ArgsW[ArgsWCount]); 191 end; // Process commandline parameter 192 Inc(ArgsWCount); 193 194 end; 195 Dec(ArgsWCount); 196 //Note: 197 //On WinCe Argsv is a static function, so we cannot change it. 198 //This might change in the future if Argsv on WinCE will be declared as a function variable 199 {$IFDEF ArgsWAsUTF8} 200 if DefaultSystemCodePage=CP_UTF8 then 201 SetupArgvAsUtf8; 202 {$IFEND} 203end; 204 205function ParamStrUtf8Wide(Param: Integer): String; 206begin 207 if ArgsWCount <> ParamCount then 208 begin 209 //DebugLn('Error: ParamCount <> ArgsWCount!'); 210 Result := SysToUtf8(ObjPas.ParamStr(Param)); 211 end 212 else 213 begin 214 if (Param <= ArgsWCount) then 215 {$IFDEF ACP_RTL} 216 Result := String(UnicodeString(ArgsW[Param])) 217 {$ELSE} 218 Result := Utf8Encode(ArgsW[Param]) 219 {$ENDIF ACP_RTL} 220 else 221 Result := ''; 222 end; 223end; 224 225{$IFNDEF WINCE} 226function GetGetEnvironmentVariableCountWide: integer; 227var 228 hp,p : PWideChar; 229begin 230 Result:=0; 231 p:=GetEnvironmentStringsW; 232 if p=nil then exit; 233 hp:=p; 234 while hp^<>#0 do 235 begin 236 Inc(Result); 237 hp:=hp+strlen(hp)+1; 238 end; 239 FreeEnvironmentStringsW(p); 240end; 241 242 243function GetEnvironmentStringWide(Index: Integer): UnicodeString; 244var 245 hp,p : PWideChar; 246begin 247 Result:=''; 248 p:=GetEnvironmentStringsW; 249 if p=nil then exit; 250 hp:=p; 251 while (hp^<>#0) and (Index>1) do 252 begin 253 Dec(Index); 254 hp:=hp+strlen(hp)+1; 255 end; 256 if (hp^<>#0) then 257 Result:=hp; 258 FreeEnvironmentStringsW(p); 259end; 260{$ENDIF WINCE} 261 262function GetEnvironmentVariableWide(const EnvVar: string): UnicodeString; 263{$IF FPC_FULLVERSION>=30000} 264begin 265 Result:=GetEnvironmentVariable(UTF8ToUTF16(EnvVar)); 266end; 267{$ELSE} 268var 269 s, upperenv : Unicodestring; 270 i : longint; 271 hp,p : pwidechar; 272begin 273 Result:=''; 274 p:=GetEnvironmentStringsW; 275 hp:=p; 276 upperenv:=uppercase(envvar); 277 while hp^<>#0 do 278 begin 279 s:=hp; 280 i:=pos('=',s); 281 if uppercase(copy(s,1,i-1))=upperenv then 282 begin 283 Result:=copy(s,i+1,length(s)-i); 284 break; 285 end; 286 { next string entry} 287 hp:=hp+strlen(hp)+1; 288 end; 289 FreeEnvironmentStringsW(p); 290end; 291{$ENDIF} 292 293 294//*************** END WideString impementations 295 296{$ifdef WinCE} 297function ConsoleToUTF8(const s: string): string;// converts console encoding to UTF8 298begin 299 Result := SysToUTF8(s); 300end; 301{$else} 302function ConsoleToUTF8(const s: string): string;// converts console encoding to UTF8 303var 304 Dst: PChar; 305begin 306 Dst := AllocMem((Length(s) + 1) * SizeOf(Char)); 307 if OemToChar(PChar(s), Dst) then 308 Result := StrPas(Dst) 309 else 310 Result := s; 311 FreeMem(Dst); 312 Result := WinCPToUTF8(Result); 313end; 314{$endif not wince} 315 316{$ifdef WinCe} 317function UTF8ToConsole(const s: string): string; // converts UTF8 to console string (used by Write, WriteLn) 318begin 319 Result := UTF8ToSys(s); 320end; 321{$else} 322function UTF8ToConsole(const s: string): string; // converts UTF8 to console string (used by Write, WriteLn) 323var 324 Dst: PChar; 325begin 326 Result := UTF8ToWinCP(s); 327 Dst := AllocMem((Length(Result) + 1) * SizeOf(Char)); 328 if CharToOEM(PChar(Result), Dst) then 329 Result := StrPas(Dst); 330 FreeMem(Dst); 331 SetCodePage(RawByteString(Result), CP_OEMCP, False); 332end; 333{$endif not WinCE} 334 335{$ifdef WinCE} 336function WinCPToUTF8(const s: string): string; inline; 337begin 338 Result := SysToUtf8(s); 339end; 340{$else} 341// for all Windows supporting 8bit codepages (e.g. not WinCE) 342function WinCPToUTF8(const s: string): string; 343// result has codepage CP_ACP 344var 345 UTF16WordCnt: SizeInt; 346 UTF16Str: UnicodeString; 347begin 348 Result:=s; 349 if IsASCII(Result) then begin 350 {$ifdef FPC_HAS_CPSTRING} 351 // prevent codepage conversion magic 352 SetCodePage(RawByteString(Result), CP_ACP, False); 353 {$endif} 354 exit; 355 end; 356 UTF16WordCnt:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), nil, 0); 357 // this will null-terminate 358 if UTF16WordCnt>0 then 359 begin 360 setlength(UTF16Str, UTF16WordCnt); 361 MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), @UTF16Str[1], UTF16WordCnt); 362 Result:=UTF8Encode(UTF16Str); 363 {$ifdef FPC_HAS_CPSTRING} 364 // prevent codepage conversion magic 365 SetCodePage(RawByteString(Result), CP_ACP, False); 366 {$endif} 367 end; 368end; 369{$endif not wince} 370 371{$ifdef WinCe} 372function UTF8ToWinCP(const s: string): string; inline; 373begin 374 Result := Utf8ToSys(s); 375end; 376{$else} 377function UTF8ToWinCP(const s: string): string; 378// result has codepage CP_ACP 379var 380 src: UnicodeString; 381 len: LongInt; 382begin 383 Result:=s; 384 if IsASCII(Result) then begin 385 {$ifdef FPC_HAS_CPSTRING} 386 // prevent codepage conversion magic 387 SetCodePage(RawByteString(Result), CP_ACP, False); 388 {$endif} 389 exit; 390 end; 391 src:=UTF8Decode(s); 392 if src='' then 393 exit; 394 len:=WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),nil,0,nil,nil); 395 SetLength(Result,len); 396 if len>0 then begin 397 WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),@Result[1],length(Result),nil,nil); 398 {$ifdef FPC_HAS_CPSTRING} 399 // prevent codepage conversion magic 400 SetCodePage(RawByteString(Result), CP_ACP, False); 401 {$endif} 402 end; 403end; 404{$endif not wince} 405 406{$ifdef debugparamstrutf8} 407procedure ParamStrUtf8Error; 408var 409 i: Integer; 410begin 411 writeln('Error in Windows WideString implementation of ParamStrUtf8'); 412 writeln('Using SysToUtf8(ParamsStr(Param)) as fallback'); 413 writeln('ParamCount = ',ParamCount,', ArgsWCount = ',ArgsWCount); 414 for i := 0 to ParamCount do writeln('ParamStr(',i,') = "',ParamStr(i),'"'); 415 writeln; 416 for i := 0 to ArgsWCount do writeln('ParamStrUtf8(',i,') = "',ArgsW[i],'"'); 417end; 418{$endif} 419 420function GetLocaleStr(aLocaleID, aLCType: Longint; const Def: string): String; 421var 422 L: Integer; 423 Buf: array[0..255] of WideChar; 424begin 425 L := GetLocaleInfoW(aLocaleID, aLCType, Buf, SizeOf(Buf)); 426 if L > 0 then 427 begin 428 Result:=''; 429 widestringmanager.Wide2AnsiMoveProc(PWideChar(@Buf[0]),Result,CP_UTF8,L-1); 430 end 431 else 432 Result := Def; 433end; 434 435function GetLocaleCharUTF8(aLocaleID, aLCType: Longint; Def: Char): Char; 436var 437 Buf: array[0..3] of WideChar; // sdate allows 4 chars (3+ending #0) 438 GLI, I: LongInt; 439 WRes: WideChar; 440begin 441 //Use Widestring Api so it works on WinCE as well 442 GLI := GetLocaleInfoW(aLocaleID, aLCType, Buf, Length(Buf)); // GLI is char count with the ending #0 char 443 if GLI > 2 then 444 begin // more than 1 char -> try to find first non-space character 445 for I := 0 to GLI-2 do 446 begin 447 WRes := Buf[I]; 448 case Buf[I] of 449 #32, #$00A0, #$2002, #$2003, #$2009, #$202F: begin end;// go over spaces 450 else 451 Break; // stop at non-space 452 end; 453 end; 454 end else 455 if GLI = 2 then // 1 char 456 WRes := Buf[0] 457 else 458 WRes := Def; 459 460 case WRes of 461 #0..#127: Result := WRes;// ASCII - OK 462 #$00A0: Result := ' '; // non breakable space 463 #$00B7: Result := '.'; // middle stop 464 #$02D9: Result := ''''; // dot above, italian handwriting 465 #$066B: Result := ','; // arabic decimal separator, persian thousand separator 466 #$066C: Result := ''''; // arabic thousand separator 467 #$2002: Result := ' '; // long space 468 #$2003: Result := ' '; // long space 469 #$2009: Result := ' '; // thin space 470 #$202F: Result := ' '; // narrow non breakable space 471 #$2014: Result := '-'; // persian decimal mark 472 #$2396: Result := ''''; // codepoint 9110 decimal separator 473 { Utf8 Utf16 474 C2 A0 -> 00A0 475 C2 B7 -> 00B7 476 CB 99 -> 02D9 477 D9 AB -> 066B 478 D9 AC -> 066C 479 E2 80 82 -> 2002 480 E2 80 83 -> 2003 481 E2 80 89 -> 2009 482 E2 80 AF -> 202F 483 E2 80 94 -> 2014 484 E2 8E 96 -> 2396 485 } 486 else // unicode character -> we need default ASCII char 487 Result := Def; 488 end; //case 489end; 490 491procedure GetLocaleFormatSettingsUTF8(LCID: Integer; var aFormatSettings: TFormatSettings); 492var 493 HF : Shortstring; 494 LID : Windows.LCID; 495 I,Day : longint; 496begin 497 LID := LCID; 498 with aFormatSettings do 499 begin 500 { Date stuff } 501 for I := 1 to 12 do 502 begin 503 ShortMonthNames[I]:=GetLocaleStr(LID,LOCALE_SABBREVMONTHNAME1+I-1,ShortMonthNames[i]); 504 LongMonthNames[I]:=GetLocaleStr(LID,LOCALE_SMONTHNAME1+I-1,LongMonthNames[i]); 505 end; 506 for I := 1 to 7 do 507 begin 508 Day := (I + 5) mod 7; 509 ShortDayNames[I]:=GetLocaleStr(LID,LOCALE_SABBREVDAYNAME1+Day,ShortDayNames[i]); 510 LongDayNames[I]:=GetLocaleStr(LID,LOCALE_SDAYNAME1+Day,LongDayNames[i]); 511 end; 512 DateSeparator := GetLocaleCharUTF8(LID, LOCALE_SDATE, '/'); 513 ShortDateFormat := GetLocaleStr(LID, LOCALE_SSHORTDATE, 'm/d/yy'); 514 LongDateFormat := GetLocaleStr(LID, LOCALE_SLONGDATE, 'mmmm d, yyyy'); 515 { Time stuff } 516 TimeSeparator := GetLocaleCharUTF8(LID, LOCALE_STIME, ':'); 517 TimeAMString := GetLocaleStr(LID, LOCALE_S1159, 'AM'); 518 TimePMString := GetLocaleStr(LID, LOCALE_S2359, 'PM'); 519 if StrToIntDef(GetLocaleStr(LID, LOCALE_ITLZERO, '0'), 0) = 0 then 520 HF:='h' 521 else 522 HF:='hh'; 523 // No support for 12 hour stuff at the moment... 524 ShortTimeFormat := HF+':nn'; 525 LongTimeFormat := HF + ':nn:ss'; 526 { Currency stuff } 527 CurrencyString:=GetLocaleStr(LID, LOCALE_SCURRENCY, ''); 528 CurrencyFormat:=StrToIntDef(GetLocaleStr(LID, LOCALE_ICURRENCY, '0'), 0); 529 NegCurrFormat:=StrToIntDef(GetLocaleStr(LID, LOCALE_INEGCURR, '0'), 0); 530 { Number stuff } 531 ThousandSeparator:=GetLocaleCharUTF8(LID, LOCALE_STHOUSAND, ','); 532 DecimalSeparator:=GetLocaleCharUTF8(LID, LOCALE_SDECIMAL, '.'); 533 CurrencyDecimals:=StrToIntDef(GetLocaleStr(LID, LOCALE_ICURRDIGITS, '0'), 0); 534 ListSeparator := GetLocaleCharUTF8(LID, LOCALE_SLIST, ','); 535 end; 536end; 537 538procedure GetFormatSettingsUTF8; 539begin 540 {$ifndef wince} 541 GetLocaleFormatSettingsUTF8(GetThreadLocale, FormatSettings); 542 {$else} 543 GetLocaleFormatSettingsUTF8(GetUserDefaultLCID, FormatSettings); 544 {$endif} 545end; 546 547{$IFDEF UTF8_RTL} 548function UTF8StrCompAnsiString(S1, S2: PChar): PtrInt; 549begin 550 Result:=UTF8CompareStrP(S1,S2); 551end; 552 553function UTF8StrICompAnsiString(S1, S2: PChar): PtrInt; 554var 555 U1, U2: String; 556begin 557 U1:=StrPas(S1); 558 U2:=StrPas(S2); 559 Result:=UTF8CompareText(U1,U2); 560end; 561 562function UTF8StrLCompAnsiString(S1, S2: PChar; Count: PtrUInt): PtrInt; 563begin 564 Result:=UTF8CompareStr(S1,Count,S2,Count); 565end; 566 567function UTF8StrLICompAnsiString(S1, S2: PChar; Count: PtrUInt): PtrInt; 568var 569 U1, U2: String; 570begin 571 if Count>0 then begin 572 SetLength(U1,Count); 573 Move(S1^,PByte(U1)^,Count); 574 SetLength(U2,Count); 575 Move(S2^,PByte(U2)^,Count); 576 Result:=UTF8CompareText(U1,U2); 577 end else 578 Result:=0; 579end; 580{$ENDIF} 581 582procedure InitLazUtf8; 583begin 584 {$ifndef WinCE} 585 if Win32MajorVersion <= 4 then 586 begin 587 _ParamStrUtf8 := @ParamStrUtf8Ansi; 588 end 589 else 590 {$endif} 591 begin 592 try 593 ArgsWCount := -1; 594 _ParamStrUtf8 := @ParamStrUtf8Wide; 595 SetupCommandlineParametersWide; 596 {$ifdef debugparamstrutf8} 597 if ParamCount <> ArgsWCount then ParamStrUtf8Error; 598 {$endif} 599 Except 600 begin 601 ArgsWCount := -1; 602 {$ifdef debugparamstrutf8} 603 ParamStrUtf8Error; 604 {$endif} 605 end; 606 end; 607 end; 608 {$IFDEF UTF8_RTL} 609 GetFormatSettingsUTF8; 610 widestringmanager.UpperAnsiStringProc:=@UTF8UpperString; 611 widestringmanager.LowerAnsiStringProc:=@UTF8LowerString; 612 widestringmanager.CompareStrAnsiStringProc:=@UTF8CompareStr; 613 widestringmanager.CompareTextAnsiStringProc:=@UTF8CompareText; 614 widestringmanager.StrCompAnsiStringProc:=@UTF8StrCompAnsiString; 615 widestringmanager.StrICompAnsiStringProc:=@UTF8StrICompAnsiString; 616 widestringmanager.StrLCompAnsiStringProc:=@UTF8StrLCompAnsiString; 617 widestringmanager.StrLICompAnsiStringProc:=@UTF8StrLICompAnsiString; 618 // Does anyone need these two? 619 //widestringmanager.StrLowerAnsiStringProc; 620 //widestringmanager.StrUpperAnsiStringProc; 621 {$IFEND} 622end; 623 624procedure FinalizeLazUTF8; 625{$IFDEF ArgsWAsUTF8} 626var 627 p: PPChar; 628{$ENDIF} 629begin 630 {$IFDEF ArgsWAsUTF8} 631 // restore argv and free memory 632 if OldArgV<>nil then 633 begin 634 p:=argv; 635 argv:=OldArgV; 636 Freemem(p); 637 end; 638 {$IFEND} 639end; 640