1{%MainUnit lazutf8.pas}
2
3{$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)}
4  {$DEFINE ArgsWAsUTF8}
5{$ENDIF}
6
7var
8  //Function prototypes
9  _ParamStrUtf8: Function(Param: Integer): string;
10
11var
12  ArgsW: Array of WideString;
13  ArgsWCount: Integer; // length(ArgsW)+1
14  {$IFDEF ArgsWAsUTF8}
15  ArgsUTF8: Array of String; // the ArgsW array as UTF8
16  OldArgV: PPChar = nil;
17  {$IFEND}
18
19//************ START "Stubs" that just call Ansi or Wide implementation
20
21function ParamStrUTF8(Param: Integer): string;
22begin
23  Result := _ParamStrUtf8(Param);
24end;
25
26//************ END "Stubs" that just call Ansi or Wide implementation
27
28
29//*************** START Non WideString implementations
30{$ifndef wince}
31function ParamStrUtf8Ansi(Param: Integer): String;
32begin
33  Result:=SysToUTF8(ObjPas.ParamStr(Param));
34end;
35{$endif wince}
36
37//*************** END Non WideString impementations
38
39
40
41
42//*************** START WideString impementations
43
44
45{$IFDEF ArgsWAsUTF8}
46procedure SetupArgvAsUtf8;
47var
48  i: Integer;
49begin
50  SetLength(ArgsUTF8,length(ArgsW));
51  OldArgV:=argv;
52  GetMem(argv,SizeOf(Pointer)*length(ArgsW));
53  for i:=0 to length(ArgsW)-1 do
54  begin
55    ArgsUTF8[i]:=ArgsW{%H-}[i];
56    argv[i]:=PChar(ArgsUTF8[i]);
57  end;
58end;
59{$endif}
60
61procedure SetupCommandlineParametersWide;
62var
63  ArgLen, Start, CmdLen, i, j: SizeInt;
64  Quote   : Boolean;
65  Buf: array[0..259] of WChar;  // need MAX_PATH bytes, not 256!
66  PCmdLineW: PWideChar;
67  CmdLineW: WideString;
68
69  procedure AllocArg(Idx, Len:longint);
70  begin
71    if (Idx >= ArgsWCount) then
72      SetLength(ArgsW, Idx + 1);
73    SetLength(ArgsW[Idx], Len);
74  end;
75
76begin
77  { create commandline, it starts with the executed filename which is argv[0] }
78  { Win32 passes the command NOT via the args, but via getmodulefilename}
79  ArgsWCount := 0;
80  ArgLen := GetModuleFileNameW(0, @buf[0], sizeof(buf));
81
82  //writeln('ArgLen = ',Arglen);
83
84  buf[ArgLen] := #0; // be safe, no terminating 0 on XP
85  allocarg(0,arglen);
86  move(buf[0],ArgsW[0][1],arglen * SizeOf(WChar));
87
88  //writeln('ArgsW[0] = ',ArgsW[0]);
89
90  PCmdLineW := nil;
91  { Setup cmdline variable }
92  PCmdLineW := GetCommandLineW;
93  CmdLen := StrLen(PCmdLineW);
94
95  //writeln('StrLen(PCmdLineW) = ',CmdLen);
96
97  SetLength(CmdLineW, CmdLen);
98  Move(PCmdLineW^, CmdLineW[1], CmdLen * SizeOf(WChar));
99
100
101  //debugln(CmdLineW);
102  //for i := 1 to CmdLen do DbgOut(DbgS(i mod 10)); debugln;
103
104  i := 1;
105  while (i <= CmdLen) do
106  begin
107    //debugln('Next');
108    //DbgOut('i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0');
109    //skip leading spaces
110    while (i <= CmdLen) and (CmdLineW[i] <= #32) do Inc(i);
111    //DbgOut('After skipping spaces: i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0');
112    if (i > CmdLen) then Break;
113    Quote := False;
114    Start := i;
115    ArgLen := 0;
116    while (i <= CmdLen) do
117    begin //find next commandline parameter
118      case CmdLineW[i] of
119        #1..#32:
120        begin
121          if Quote then
122          begin
123            //debugln('i=',DbgS(i),': Space in Quote');
124            Inc(ArgLen)
125          end
126          else
127          begin
128            //debugln('i=',DbgS(i),': Space in NOT Quote');
129            Break;
130          end;
131        end;
132        '"':
133        begin
134          if (i < CmdLen) and (CmdLineW[i+1] <> '"') then
135          begin
136            //debugln('i=',DbgS(i),': Quote := not Quote');
137            Quote := not Quote
138          end
139          else
140          begin
141            //debugln('i=',DbgS(i),': Skip Quote');
142            Inc(i);
143          end;
144        end;
145        else Inc(ArgLen);
146      end;//case
147      Inc(i);
148    end; //find next commandline parameter
149
150    //debugln('ArgWCount=',DbgS(ArgsWCount),' Start=',DbgS(start),' ArgLen=',DbgS(arglen),' i=',DbgS(i));
151
152    //we already have (a better) ArgW[0]
153    if (ArgsWCount > 0) then
154    begin //Process commandline parameter
155      AllocArg(ArgsWCount, ArgLen);
156      Quote := False;
157      i := Start;
158      j := 1;
159      while (i <= CmdLen) do
160      begin
161        case CmdLineW[i] of
162          #1..#32:
163          begin
164            if Quote then
165            begin
166              //if j > ArgLen then debugln('Error whitespace: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen));
167              ArgsW[ArgsWCount][j] := CmdLineW[i];
168              Inc(j);
169            end
170            else
171              Break;
172          end;
173          '"':
174          begin
175            if (i < CmdLen) and (CmdLineW[i+1] <> '"') then
176              Quote := not Quote
177            else
178              Inc(i);
179          end;
180          else
181          begin
182            //if j > ArgLen then debugln('Error Quote: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen));
183            ArgsW[ArgsWCount][j] := CmdLineW[i];
184            Inc(j);
185          end;
186        end;
187        Inc(i);
188      end;
189
190      //debugln('ArgsW[',DbgS(ArgsWCount),'] = ',ArgsW[ArgsWCount]);
191    end; // Process commandline parameter
192    Inc(ArgsWCount);
193
194  end;
195  Dec(ArgsWCount);
196  //Note:
197  //On WinCe Argsv is a static function, so we cannot change it.
198  //This might change in the future if Argsv on WinCE will be declared as a function variable
199  {$IFDEF ArgsWAsUTF8}
200  if DefaultSystemCodePage=CP_UTF8 then
201    SetupArgvAsUtf8;
202  {$IFEND}
203end;
204
205function ParamStrUtf8Wide(Param: Integer): String;
206begin
207  if ArgsWCount <> ParamCount then
208  begin
209    //DebugLn('Error: ParamCount <> ArgsWCount!');
210    Result := SysToUtf8(ObjPas.ParamStr(Param));
211  end
212  else
213  begin
214    if (Param <= ArgsWCount) then
215      {$IFDEF ACP_RTL}
216      Result := String(UnicodeString(ArgsW[Param]))
217      {$ELSE}
218      Result := Utf8Encode(ArgsW[Param])
219      {$ENDIF ACP_RTL}
220    else
221      Result := '';
222  end;
223end;
224
225{$IFNDEF WINCE}
226function GetGetEnvironmentVariableCountWide: integer;
227var
228  hp,p : PWideChar;
229begin
230  Result:=0;
231  p:=GetEnvironmentStringsW;
232  if p=nil then exit;
233  hp:=p;
234  while hp^<>#0 do
235  begin
236    Inc(Result);
237    hp:=hp+strlen(hp)+1;
238  end;
239  FreeEnvironmentStringsW(p);
240end;
241
242
243function GetEnvironmentStringWide(Index: Integer): UnicodeString;
244var
245  hp,p : PWideChar;
246begin
247  Result:='';
248  p:=GetEnvironmentStringsW;
249  if p=nil then exit;
250  hp:=p;
251  while (hp^<>#0) and (Index>1) do
252  begin
253    Dec(Index);
254    hp:=hp+strlen(hp)+1;
255  end;
256  if (hp^<>#0) then
257    Result:=hp;
258  FreeEnvironmentStringsW(p);
259end;
260{$ENDIF WINCE}
261
262function GetEnvironmentVariableWide(const EnvVar: string): UnicodeString;
263{$IF FPC_FULLVERSION>=30000}
264begin
265  Result:=GetEnvironmentVariable(UTF8ToUTF16(EnvVar));
266end;
267{$ELSE}
268var
269  s, upperenv : Unicodestring;
270  i : longint;
271  hp,p : pwidechar;
272begin
273  Result:='';
274  p:=GetEnvironmentStringsW;
275  hp:=p;
276  upperenv:=uppercase(envvar);
277  while hp^<>#0 do
278  begin
279    s:=hp;
280    i:=pos('=',s);
281    if uppercase(copy(s,1,i-1))=upperenv then
282    begin
283      Result:=copy(s,i+1,length(s)-i);
284      break;
285    end;
286    { next string entry}
287    hp:=hp+strlen(hp)+1;
288  end;
289  FreeEnvironmentStringsW(p);
290end;
291{$ENDIF}
292
293
294//*************** END WideString impementations
295
296{$ifdef WinCE}
297function ConsoleToUTF8(const s: string): string;// converts console encoding to UTF8
298begin
299  Result := SysToUTF8(s);
300end;
301{$else}
302function ConsoleToUTF8(const s: string): string;// converts console encoding to UTF8
303var
304  Dst: PChar;
305begin
306  Dst := AllocMem((Length(s) + 1) * SizeOf(Char));
307  if OemToChar(PChar(s), Dst) then
308    Result := StrPas(Dst)
309  else
310    Result := s;
311  FreeMem(Dst);
312  Result := WinCPToUTF8(Result);
313end;
314{$endif not wince}
315
316{$ifdef WinCe}
317function UTF8ToConsole(const s: string): string; // converts UTF8 to console string (used by Write, WriteLn)
318begin
319  Result := UTF8ToSys(s);
320end;
321{$else}
322function UTF8ToConsole(const s: string): string; // converts UTF8 to console string (used by Write, WriteLn)
323var
324  Dst: PChar;
325begin
326  Result := UTF8ToWinCP(s);
327  Dst := AllocMem((Length(Result) + 1) * SizeOf(Char));
328  if CharToOEM(PChar(Result), Dst) then
329    Result := StrPas(Dst);
330  FreeMem(Dst);
331  SetCodePage(RawByteString(Result), CP_OEMCP, False);
332end;
333{$endif not WinCE}
334
335{$ifdef WinCE}
336function WinCPToUTF8(const s: string): string; inline;
337begin
338  Result := SysToUtf8(s);
339end;
340{$else}
341// for all Windows supporting 8bit codepages (e.g. not WinCE)
342function WinCPToUTF8(const s: string): string;
343// result has codepage CP_ACP
344var
345  UTF16WordCnt: SizeInt;
346  UTF16Str: UnicodeString;
347begin
348  Result:=s;
349  if IsASCII(Result) then begin
350    {$ifdef FPC_HAS_CPSTRING}
351    // prevent codepage conversion magic
352    SetCodePage(RawByteString(Result), CP_ACP, False);
353    {$endif}
354    exit;
355  end;
356  UTF16WordCnt:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), nil, 0);
357  // this will null-terminate
358  if UTF16WordCnt>0 then
359  begin
360    setlength(UTF16Str, UTF16WordCnt);
361    MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), @UTF16Str[1], UTF16WordCnt);
362    Result:=UTF8Encode(UTF16Str);
363    {$ifdef FPC_HAS_CPSTRING}
364    // prevent codepage conversion magic
365    SetCodePage(RawByteString(Result), CP_ACP, False);
366    {$endif}
367  end;
368end;
369{$endif not wince}
370
371{$ifdef WinCe}
372function UTF8ToWinCP(const s: string): string; inline;
373begin
374  Result := Utf8ToSys(s);
375end;
376{$else}
377function UTF8ToWinCP(const s: string): string;
378// result has codepage CP_ACP
379var
380  src: UnicodeString;
381  len: LongInt;
382begin
383  Result:=s;
384  if IsASCII(Result) then begin
385    {$ifdef FPC_HAS_CPSTRING}
386    // prevent codepage conversion magic
387    SetCodePage(RawByteString(Result), CP_ACP, False);
388    {$endif}
389    exit;
390  end;
391  src:=UTF8Decode(s);
392  if src='' then
393    exit;
394  len:=WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),nil,0,nil,nil);
395  SetLength(Result,len);
396  if len>0 then begin
397    WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),@Result[1],length(Result),nil,nil);
398    {$ifdef FPC_HAS_CPSTRING}
399    // prevent codepage conversion magic
400    SetCodePage(RawByteString(Result), CP_ACP, False);
401    {$endif}
402  end;
403end;
404{$endif not wince}
405
406{$ifdef debugparamstrutf8}
407procedure ParamStrUtf8Error;
408var
409  i: Integer;
410begin
411  writeln('Error in Windows WideString implementation of ParamStrUtf8');
412  writeln('Using SysToUtf8(ParamsStr(Param)) as fallback');
413  writeln('ParamCount = ',ParamCount,', ArgsWCount = ',ArgsWCount);
414  for i := 0 to ParamCount do writeln('ParamStr(',i,') = "',ParamStr(i),'"');
415  writeln;
416  for i := 0 to ArgsWCount do writeln('ParamStrUtf8(',i,') = "',ArgsW[i],'"');
417end;
418{$endif}
419
420function GetLocaleStr(aLocaleID, aLCType: Longint; const Def: string): String;
421var
422  L: Integer;
423  Buf: array[0..255] of WideChar;
424begin
425  L := GetLocaleInfoW(aLocaleID, aLCType, Buf, SizeOf(Buf));
426  if L > 0 then
427  begin
428    Result:='';
429    widestringmanager.Wide2AnsiMoveProc(PWideChar(@Buf[0]),Result,CP_UTF8,L-1);
430  end
431  else
432    Result := Def;
433end;
434
435function GetLocaleCharUTF8(aLocaleID, aLCType: Longint; Def: Char): Char;
436var
437  Buf: array[0..3] of WideChar; // sdate allows 4 chars (3+ending #0)
438  GLI, I: LongInt;
439  WRes: WideChar;
440begin
441  //Use Widestring Api so it works on WinCE as well
442  GLI := GetLocaleInfoW(aLocaleID, aLCType, Buf, Length(Buf)); // GLI is char count with the ending #0 char
443  if GLI > 2 then
444  begin // more than 1 char -> try to find first non-space character
445    for I := 0 to GLI-2 do
446    begin
447      WRes := Buf[I];
448      case Buf[I] of
449        #32, #$00A0, #$2002, #$2003, #$2009, #$202F: begin end;// go over spaces
450      else
451        Break; // stop at non-space
452      end;
453    end;
454  end else
455  if GLI = 2 then // 1 char
456    WRes := Buf[0]
457  else
458    WRes := Def;
459
460  case WRes of
461    #0..#127: Result := WRes;// ASCII - OK
462    #$00A0: Result := ' ';   // non breakable space
463    #$00B7: Result := '.';   // middle stop
464    #$02D9: Result := '''';  // dot above, italian handwriting
465    #$066B: Result := ',';   // arabic decimal separator, persian thousand separator
466    #$066C: Result := '''';  // arabic thousand separator
467    #$2002: Result := ' ';   // long space
468    #$2003: Result := ' ';   // long space
469    #$2009: Result := ' ';   // thin space
470    #$202F: Result := ' ';   // narrow non breakable space
471    #$2014: Result := '-';   // persian decimal mark
472    #$2396: Result := '''';  // codepoint 9110 decimal separator
473    { Utf8        Utf16
474      C2 A0    -> 00A0
475      C2 B7    -> 00B7
476      CB 99    -> 02D9
477      D9 AB    -> 066B
478      D9 AC    -> 066C
479      E2 80 82 -> 2002
480      E2 80 83 -> 2003
481      E2 80 89 -> 2009
482      E2 80 AF -> 202F
483      E2 80 94 -> 2014
484      E2 8E 96 -> 2396
485    }
486  else // unicode character -> we need default ASCII char
487    Result := Def;
488  end;  //case
489end;
490
491procedure GetLocaleFormatSettingsUTF8(LCID: Integer; var aFormatSettings: TFormatSettings);
492var
493  HF  : Shortstring;
494  LID : Windows.LCID;
495  I,Day : longint;
496begin
497  LID := LCID;
498  with aFormatSettings do
499  begin
500    { Date stuff }
501    for I := 1 to 12 do
502      begin
503      ShortMonthNames[I]:=GetLocaleStr(LID,LOCALE_SABBREVMONTHNAME1+I-1,ShortMonthNames[i]);
504      LongMonthNames[I]:=GetLocaleStr(LID,LOCALE_SMONTHNAME1+I-1,LongMonthNames[i]);
505      end;
506    for I := 1 to 7 do
507      begin
508      Day := (I + 5) mod 7;
509      ShortDayNames[I]:=GetLocaleStr(LID,LOCALE_SABBREVDAYNAME1+Day,ShortDayNames[i]);
510      LongDayNames[I]:=GetLocaleStr(LID,LOCALE_SDAYNAME1+Day,LongDayNames[i]);
511      end;
512    DateSeparator := GetLocaleCharUTF8(LID, LOCALE_SDATE, '/');
513    ShortDateFormat := GetLocaleStr(LID, LOCALE_SSHORTDATE, 'm/d/yy');
514    LongDateFormat := GetLocaleStr(LID, LOCALE_SLONGDATE, 'mmmm d, yyyy');
515    { Time stuff }
516    TimeSeparator := GetLocaleCharUTF8(LID, LOCALE_STIME, ':');
517    TimeAMString := GetLocaleStr(LID, LOCALE_S1159, 'AM');
518    TimePMString := GetLocaleStr(LID, LOCALE_S2359, 'PM');
519    if StrToIntDef(GetLocaleStr(LID, LOCALE_ITLZERO, '0'), 0) = 0 then
520      HF:='h'
521    else
522      HF:='hh';
523    // No support for 12 hour stuff at the moment...
524    ShortTimeFormat := HF+':nn';
525    LongTimeFormat := HF + ':nn:ss';
526    { Currency stuff }
527    CurrencyString:=GetLocaleStr(LID, LOCALE_SCURRENCY, '');
528    CurrencyFormat:=StrToIntDef(GetLocaleStr(LID, LOCALE_ICURRENCY, '0'), 0);
529    NegCurrFormat:=StrToIntDef(GetLocaleStr(LID, LOCALE_INEGCURR, '0'), 0);
530    { Number stuff }
531    ThousandSeparator:=GetLocaleCharUTF8(LID, LOCALE_STHOUSAND, ',');
532    DecimalSeparator:=GetLocaleCharUTF8(LID, LOCALE_SDECIMAL, '.');
533    CurrencyDecimals:=StrToIntDef(GetLocaleStr(LID, LOCALE_ICURRDIGITS, '0'), 0);
534    ListSeparator := GetLocaleCharUTF8(LID, LOCALE_SLIST, ',');
535  end;
536end;
537
538procedure GetFormatSettingsUTF8;
539begin
540  {$ifndef wince}
541  GetLocaleFormatSettingsUTF8(GetThreadLocale, FormatSettings);
542  {$else}
543  GetLocaleFormatSettingsUTF8(GetUserDefaultLCID, FormatSettings);
544  {$endif}
545end;
546
547{$IFDEF UTF8_RTL}
548function UTF8StrCompAnsiString(S1, S2: PChar): PtrInt;
549begin
550  Result:=UTF8CompareStrP(S1,S2);
551end;
552
553function UTF8StrICompAnsiString(S1, S2: PChar): PtrInt;
554var
555  U1, U2: String;
556begin
557  U1:=StrPas(S1);
558  U2:=StrPas(S2);
559  Result:=UTF8CompareText(U1,U2);
560end;
561
562function UTF8StrLCompAnsiString(S1, S2: PChar; Count: PtrUInt): PtrInt;
563begin
564  Result:=UTF8CompareStr(S1,Count,S2,Count);
565end;
566
567function UTF8StrLICompAnsiString(S1, S2: PChar; Count: PtrUInt): PtrInt;
568var
569  U1, U2: String;
570begin
571  if Count>0 then begin
572    SetLength(U1,Count);
573    Move(S1^,PByte(U1)^,Count);
574    SetLength(U2,Count);
575    Move(S2^,PByte(U2)^,Count);
576    Result:=UTF8CompareText(U1,U2);
577  end else
578    Result:=0;
579end;
580{$ENDIF}
581
582procedure InitLazUtf8;
583begin
584  {$ifndef WinCE}
585  if Win32MajorVersion <= 4 then
586  begin
587    _ParamStrUtf8 := @ParamStrUtf8Ansi;
588  end
589  else
590  {$endif}
591  begin
592    try
593      ArgsWCount := -1;
594      _ParamStrUtf8 := @ParamStrUtf8Wide;
595      SetupCommandlineParametersWide;
596      {$ifdef debugparamstrutf8}
597      if ParamCount <> ArgsWCount then ParamStrUtf8Error;
598      {$endif}
599    Except
600      begin
601        ArgsWCount := -1;
602        {$ifdef debugparamstrutf8}
603        ParamStrUtf8Error;
604        {$endif}
605      end;
606    end;
607  end;
608  {$IFDEF UTF8_RTL}
609  GetFormatSettingsUTF8;
610  widestringmanager.UpperAnsiStringProc:=@UTF8UpperString;
611  widestringmanager.LowerAnsiStringProc:=@UTF8LowerString;
612  widestringmanager.CompareStrAnsiStringProc:=@UTF8CompareStr;
613  widestringmanager.CompareTextAnsiStringProc:=@UTF8CompareText;
614  widestringmanager.StrCompAnsiStringProc:=@UTF8StrCompAnsiString;
615  widestringmanager.StrICompAnsiStringProc:=@UTF8StrICompAnsiString;
616  widestringmanager.StrLCompAnsiStringProc:=@UTF8StrLCompAnsiString;
617  widestringmanager.StrLICompAnsiStringProc:=@UTF8StrLICompAnsiString;
618  // Does anyone need these two?
619  //widestringmanager.StrLowerAnsiStringProc;
620  //widestringmanager.StrUpperAnsiStringProc;
621  {$IFEND}
622end;
623
624procedure FinalizeLazUTF8;
625{$IFDEF ArgsWAsUTF8}
626var
627  p: PPChar;
628{$ENDIF}
629begin
630  {$IFDEF ArgsWAsUTF8}
631  // restore argv and free memory
632  if OldArgV<>nil then
633  begin
634    p:=argv;
635    argv:=OldArgV;
636    Freemem(p);
637  end;
638  {$IFEND}
639end;
640