1unit pas2jsutils;
2{
3    This file is part of the Free Component Library (FCL)
4    Copyright (c) 2018  Mattias Gaertner  mattias@freepascal.org
5
6    Pascal to Javascript converter class.
7
8    See the file COPYING.FPC, included in this distribution,
9    for details about the copyright.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
15 **********************************************************************
16
17  Abstract:
18    Utility routines that do not need a filesystem or OS functionality.
19    Filesystem-specific things should go to pas2jsfileutils instead.
20}
21{$mode objfpc}{$H+}
22
23interface
24
25uses
26  Classes, SysUtils;
27
28function ChompPathDelim(const Path: string): string;
29function GetNextDelimitedItem(const List: string; Delimiter: char;
30                              var Position: integer): string;
31type
32   TChangeStamp = SizeInt;
33
34const
35  InvalidChangeStamp = low(TChangeStamp);
36
37Function IncreaseChangeStamp(Stamp: TChangeStamp) : TChangeStamp;
38const
39  EncodingUTF8 = 'UTF-8';
40  EncodingSystem = 'System';
41
42function NormalizeEncoding(const Encoding: string): string;
43function IsASCII(const s: string): boolean; inline;
44{$IFDEF FPC_HAS_CPSTRING}
45const
46  UTF8BOM = #$EF#$BB#$BF;
47function UTF8CharacterStrictLength(P: PChar): integer;
48
49function UTF8ToUTF16(const s: string): UnicodeString;
50function UTF16ToUTF8(const s: UnicodeString): string;
51
52{$ENDIF FPC_HAS_CPSTRING}
53
54function IsNonUTF8System: boolean;// true if system encoding is not UTF-8
55{$IFDEF Windows}
56// AConsole - If false, it is the general system encoding,
57//            if true, it is the console encoding
58function GetWindowsEncoding(AConsole: Boolean = False): string;
59{$ENDIF}
60{$IF defined(Unix) and not defined(Darwin)}
61function GetUnixEncoding: string;
62{$ENDIF}
63
64Function NonUTF8System: boolean;
65function GetDefaultTextEncoding: string;
66
67procedure SplitCmdLineParams(const Params: string; ParamList: TStrings;
68                             ReadBackslash: boolean = false);
69
70implementation
71
72{$IFDEF Windows}
73uses Windows;
74{$ENDIF}
75
76Var
77  {$IFDEF Unix}
78  {$IFNDEF Darwin}
79  Lang: string = '';
80  {$ENDIF}
81  {$ENDIF}
82  EncodingValid: boolean = false;
83  DefaultTextEncoding: string = EncodingSystem;
84  gNonUTF8System : Boolean = {$IFDEF FPC_HAS_CPSTRING}false{$ELSE}true{$ENDIF};
85
86Function NonUTF8System: boolean;
87
88begin
89  Result:=gNonUTF8System;
90end;
91
92function GetNextDelimitedItem(const List: string; Delimiter: char;
93  var Position: integer): string;
94var
95  StartPos: Integer;
96begin
97  StartPos:=Position;
98  while (Position<=length(List)) and (List[Position]<>Delimiter) do
99    inc(Position);
100  Result:=copy(List,StartPos,Position-StartPos);
101  if Position<=length(List) then inc(Position); // skip Delimiter
102end;
103
104function IncreaseChangeStamp(Stamp: TChangeStamp): TChangeStamp;
105begin
106  if Stamp<High(TChangeStamp) then
107    Result:=Stamp+1
108  else
109    Result:=InvalidChangeStamp+1;
110end;
111
112function ChompPathDelim(const Path: string): string;
113var
114  Len, MinLen: Integer;
115begin
116  Result:=Path;
117  if Path = '' then
118    exit;
119  Len:=length(Result);
120  if (Result[1] in AllowDirectorySeparators) then
121  begin
122    MinLen := 1;
123    {$IFDEF HasUNCPaths}
124    if (Len >= 2) and (Result[2] in AllowDirectorySeparators) then
125      MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a'
126    {$ENDIF}
127    {$IFDEF Pas2js}
128    if (Len >= 2) and (Result[2]=Result[1]) and (PathDelim='\') then
129      MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a'
130    {$ENDIF}
131  end
132  else begin
133    MinLen := 0;
134    {$IFdef MSWindows}
135    if (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z'])  and
136       (Result[2] = ':') and (Result[3] in AllowDirectorySeparators)
137    then
138      MinLen := 3;
139    {$ENDIF}
140    {$IFdef Pas2js}
141    if (PathDelim='\')
142        and (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z'])
143        and (Result[2] = ':') and (Result[3] in AllowDirectorySeparators)
144    then
145      MinLen := 3;
146    {$ENDIF}
147  end;
148
149  while (Len > MinLen) and (Result[Len] in AllowDirectorySeparators) do dec(Len);
150  if Len<length(Result) then
151    SetLength(Result,Len);
152end;
153
154function NormalizeEncoding(const Encoding: string): string;
155var
156  i: Integer;
157begin
158  Result:=LowerCase(Encoding);
159  for i:=length(Result) downto 1 do
160    if Result[i]='-' then Delete(Result,i,1);
161end;
162
163{$IFDEF WINDOWS}
164function GetWindowsEncoding(AConsole: Boolean = False): string;
165var
166  cp : UINT;
167{$IFDEF WinCE}
168// CP_UTF8 is missing in the windows unit of the Windows CE RTL
169const
170  CP_UTF8 = 65001;
171{$ENDIF}
172begin
173  if AConsole then cp := GetOEMCP
174  else cp := GetACP;
175
176  case cp of
177    CP_UTF8: Result := EncodingUTF8;
178  else
179    Result:='cp'+IntToStr(cp);
180  end;
181end;
182{$ENDIF}
183
184function IsASCII(const s: string): boolean; inline;
185{$IFDEF Pas2js}
186var
187  i: Integer;
188begin
189  for i:=1 to length(s) do
190    if s[i]>#127 then exit(false);
191  Result:=true;
192end;
193{$ELSE}
194var
195  p: PChar;
196begin
197  if s='' then exit(true);
198  p:=PChar(s);
199  repeat
200    case p^ of
201    #0: if p-PChar(s)=length(s) then exit(true);
202    #128..#255: exit(false);
203    end;
204    inc(p);
205  until false;
206end;
207{$ENDIF}
208
209{$IFDEF FPC_HAS_CPSTRING}
210function UTF8CharacterStrictLength(P: PChar): integer;
211begin
212  if p=nil then exit(0);
213  if ord(p^)<%10000000 then
214  begin
215    // regular single byte character
216    exit(1);
217  end
218  else if ord(p^)<%11000000 then
219  begin
220    // invalid single byte character
221    exit(0);
222  end
223  else if ((ord(p^) and %11100000) = %11000000) then
224  begin
225    // should be 2 byte character
226    if (ord(p[1]) and %11000000) = %10000000 then
227      exit(2)
228    else
229      exit(0);
230  end
231  else if ((ord(p^) and %11110000) = %11100000) then
232  begin
233    // should be 3 byte character
234    if ((ord(p[1]) and %11000000) = %10000000)
235    and ((ord(p[2]) and %11000000) = %10000000) then
236      exit(3)
237    else
238      exit(0);
239  end
240  else if ((ord(p^) and %11111000) = %11110000) then
241  begin
242    // should be 4 byte character
243    if ((ord(p[1]) and %11000000) = %10000000)
244    and ((ord(p[2]) and %11000000) = %10000000)
245    and ((ord(p[3]) and %11000000) = %10000000) then
246      exit(4)
247    else
248      exit(0);
249  end else
250    exit(0);
251end;
252
253function UTF8ToUTF16(const s: string): UnicodeString;
254begin
255  Result:=UTF8Decode(s);
256end;
257
258function UTF16ToUTF8(const s: UnicodeString): string;
259begin
260  if s='' then exit('');
261  Result:=UTF8Encode(s);
262  // prevent UTF8 codepage appear in the strings - we don't need codepage
263  // conversion magic
264  SetCodePage(RawByteString(Result), CP_ACP, False);
265end;
266{$ENDIF}
267
268function IsNonUTF8System: boolean;
269begin
270  Result:=NonUTF8System;
271end;
272
273{$IFDEF UNIX}
274{$IFNDEF Darwin}
275function GetUnixEncoding: string;
276var
277  i: integer;
278begin
279  Result:=EncodingSystem;
280  i:=pos('.',Lang);
281  if (i>0) and (i<=length(Lang)) then
282    Result:=copy(Lang,i+1,length(Lang)-i);
283end;
284{$ENDIF}
285{$ENDIF}
286
287function GetDefaultTextEncoding: string;
288
289
290begin
291  if EncodingValid then
292  begin
293    Result:=DefaultTextEncoding;
294    exit;
295  end;
296
297  {$IFDEF Pas2js}
298  Result:=EncodingUTF8;
299  {$ELSE}
300    {$IFDEF Windows}
301    Result:=GetWindowsEncoding;
302    {$ELSE}
303      {$IFDEF Darwin}
304      Result:=EncodingUTF8;
305      {$ELSE}
306      // unix
307      Lang := GetEnvironmentVariable('LC_ALL');
308      if Lang='' then
309      begin
310        Lang := GetEnvironmentVariable('LC_MESSAGES');
311        if Lang='' then
312          Lang := GetEnvironmentVariable('LANG');
313      end;
314      Result:=GetUnixEncoding;
315      {$ENDIF}
316    {$ENDIF}
317  {$ENDIF}
318  Result:=NormalizeEncoding(Result);
319
320  DefaultTextEncoding:=Result;
321  EncodingValid:=true;
322end;
323
324procedure InternalInit;
325begin
326  {$IFDEF FPC_HAS_CPSTRING}
327  SetMultiByteConversionCodePage(CP_UTF8);
328  // SetMultiByteFileSystemCodePage(CP_UTF8); not needed, this is the default under Windows
329  SetMultiByteRTLFileSystemCodePage(CP_UTF8);
330
331  GetDefaultTextEncoding;
332  {$IFDEF Windows}
333  gNonUTF8System:=true;
334  {$ELSE}
335  gNonUTF8System:=SysUtils.CompareText(DefaultTextEncoding,'UTF8')<>0;
336  {$ENDIF}
337  {$ENDIF}
338end;
339procedure SplitCmdLineParams(const Params: string; ParamList: TStrings;
340                             ReadBackslash: boolean = false);
341// split spaces, quotes are parsed as single parameter
342// if ReadBackslash=true then \" is replaced to " and not treated as quote
343// #0 is always end
344type
345  TMode = (mNormal,mApostrophe,mQuote);
346var
347  p: Integer;
348  Mode: TMode;
349  Param: String;
350begin
351  p:=1;
352  while p<=length(Params) do
353  begin
354    // skip whitespace
355    while (p<=length(Params)) and (Params[p] in [' ',#9,#10,#13]) do inc(p);
356    if (p>length(Params)) or (Params[p]=#0) then
357      break;
358    // read param
359    Param:='';
360    Mode:=mNormal;
361    while p<=length(Params) do
362    begin
363      case Params[p] of
364      #0:
365        break;
366      '\':
367        begin
368          inc(p);
369          if ReadBackslash then
370            begin
371            // treat next character as normal character
372            if (p>length(Params)) or (Params[p]=#0) then
373              break;
374            if ord(Params[p])<128 then
375            begin
376              Param+=Params[p];
377              inc(p);
378            end else begin
379              // next character is already a normal character
380            end;
381          end else begin
382            // treat backslash as normal character
383            Param+='\';
384          end;
385        end;
386      '''':
387        begin
388          inc(p);
389          case Mode of
390          mNormal:
391            Mode:=mApostrophe;
392          mApostrophe:
393            Mode:=mNormal;
394          mQuote:
395            Param+='''';
396          end;
397        end;
398      '"':
399        begin
400          inc(p);
401          case Mode of
402          mNormal:
403            Mode:=mQuote;
404          mApostrophe:
405            Param+='"';
406          mQuote:
407            Mode:=mNormal;
408          end;
409        end;
410      ' ',#9,#10,#13:
411        begin
412          if Mode=mNormal then break;
413          Param+=Params[p];
414          inc(p);
415        end;
416      else
417        Param+=Params[p];
418        inc(p);
419      end;
420    end;
421    //writeln('SplitCmdLineParams Param=#'+Param+'#');
422    ParamList.Add(Param);
423  end;
424end;
425
426
427initialization
428  InternalInit;
429end.
430
431