1unit pas2jsutils; 2{ 3 This file is part of the Free Component Library (FCL) 4 Copyright (c) 2018 Mattias Gaertner mattias@freepascal.org 5 6 Pascal to Javascript converter class. 7 8 See the file COPYING.FPC, included in this distribution, 9 for details about the copyright. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 15 ********************************************************************** 16 17 Abstract: 18 Utility routines that do not need a filesystem or OS functionality. 19 Filesystem-specific things should go to pas2jsfileutils instead. 20} 21{$mode objfpc}{$H+} 22 23interface 24 25uses 26 Classes, SysUtils; 27 28function ChompPathDelim(const Path: string): string; 29function GetNextDelimitedItem(const List: string; Delimiter: char; 30 var Position: integer): string; 31type 32 TChangeStamp = SizeInt; 33 34const 35 InvalidChangeStamp = low(TChangeStamp); 36 37Function IncreaseChangeStamp(Stamp: TChangeStamp) : TChangeStamp; 38const 39 EncodingUTF8 = 'UTF-8'; 40 EncodingSystem = 'System'; 41 42function NormalizeEncoding(const Encoding: string): string; 43function IsASCII(const s: string): boolean; inline; 44{$IFDEF FPC_HAS_CPSTRING} 45const 46 UTF8BOM = #$EF#$BB#$BF; 47function UTF8CharacterStrictLength(P: PChar): integer; 48 49function UTF8ToUTF16(const s: string): UnicodeString; 50function UTF16ToUTF8(const s: UnicodeString): string; 51 52{$ENDIF FPC_HAS_CPSTRING} 53 54function IsNonUTF8System: boolean;// true if system encoding is not UTF-8 55{$IFDEF Windows} 56// AConsole - If false, it is the general system encoding, 57// if true, it is the console encoding 58function GetWindowsEncoding(AConsole: Boolean = False): string; 59{$ENDIF} 60{$IF defined(Unix) and not defined(Darwin)} 61function GetUnixEncoding: string; 62{$ENDIF} 63 64Function NonUTF8System: boolean; 65function GetDefaultTextEncoding: string; 66 67procedure SplitCmdLineParams(const Params: string; ParamList: TStrings; 68 ReadBackslash: boolean = false); 69 70implementation 71 72{$IFDEF Windows} 73uses Windows; 74{$ENDIF} 75 76Var 77 {$IFDEF Unix} 78 {$IFNDEF Darwin} 79 Lang: string = ''; 80 {$ENDIF} 81 {$ENDIF} 82 EncodingValid: boolean = false; 83 DefaultTextEncoding: string = EncodingSystem; 84 gNonUTF8System : Boolean = {$IFDEF FPC_HAS_CPSTRING}false{$ELSE}true{$ENDIF}; 85 86Function NonUTF8System: boolean; 87 88begin 89 Result:=gNonUTF8System; 90end; 91 92function GetNextDelimitedItem(const List: string; Delimiter: char; 93 var Position: integer): string; 94var 95 StartPos: Integer; 96begin 97 StartPos:=Position; 98 while (Position<=length(List)) and (List[Position]<>Delimiter) do 99 inc(Position); 100 Result:=copy(List,StartPos,Position-StartPos); 101 if Position<=length(List) then inc(Position); // skip Delimiter 102end; 103 104function IncreaseChangeStamp(Stamp: TChangeStamp): TChangeStamp; 105begin 106 if Stamp<High(TChangeStamp) then 107 Result:=Stamp+1 108 else 109 Result:=InvalidChangeStamp+1; 110end; 111 112function ChompPathDelim(const Path: string): string; 113var 114 Len, MinLen: Integer; 115begin 116 Result:=Path; 117 if Path = '' then 118 exit; 119 Len:=length(Result); 120 if (Result[1] in AllowDirectorySeparators) then 121 begin 122 MinLen := 1; 123 {$IFDEF HasUNCPaths} 124 if (Len >= 2) and (Result[2] in AllowDirectorySeparators) then 125 MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a' 126 {$ENDIF} 127 {$IFDEF Pas2js} 128 if (Len >= 2) and (Result[2]=Result[1]) and (PathDelim='\') then 129 MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a' 130 {$ENDIF} 131 end 132 else begin 133 MinLen := 0; 134 {$IFdef MSWindows} 135 if (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z']) and 136 (Result[2] = ':') and (Result[3] in AllowDirectorySeparators) 137 then 138 MinLen := 3; 139 {$ENDIF} 140 {$IFdef Pas2js} 141 if (PathDelim='\') 142 and (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z']) 143 and (Result[2] = ':') and (Result[3] in AllowDirectorySeparators) 144 then 145 MinLen := 3; 146 {$ENDIF} 147 end; 148 149 while (Len > MinLen) and (Result[Len] in AllowDirectorySeparators) do dec(Len); 150 if Len<length(Result) then 151 SetLength(Result,Len); 152end; 153 154function NormalizeEncoding(const Encoding: string): string; 155var 156 i: Integer; 157begin 158 Result:=LowerCase(Encoding); 159 for i:=length(Result) downto 1 do 160 if Result[i]='-' then Delete(Result,i,1); 161end; 162 163{$IFDEF WINDOWS} 164function GetWindowsEncoding(AConsole: Boolean = False): string; 165var 166 cp : UINT; 167{$IFDEF WinCE} 168// CP_UTF8 is missing in the windows unit of the Windows CE RTL 169const 170 CP_UTF8 = 65001; 171{$ENDIF} 172begin 173 if AConsole then cp := GetOEMCP 174 else cp := GetACP; 175 176 case cp of 177 CP_UTF8: Result := EncodingUTF8; 178 else 179 Result:='cp'+IntToStr(cp); 180 end; 181end; 182{$ENDIF} 183 184function IsASCII(const s: string): boolean; inline; 185{$IFDEF Pas2js} 186var 187 i: Integer; 188begin 189 for i:=1 to length(s) do 190 if s[i]>#127 then exit(false); 191 Result:=true; 192end; 193{$ELSE} 194var 195 p: PChar; 196begin 197 if s='' then exit(true); 198 p:=PChar(s); 199 repeat 200 case p^ of 201 #0: if p-PChar(s)=length(s) then exit(true); 202 #128..#255: exit(false); 203 end; 204 inc(p); 205 until false; 206end; 207{$ENDIF} 208 209{$IFDEF FPC_HAS_CPSTRING} 210function UTF8CharacterStrictLength(P: PChar): integer; 211begin 212 if p=nil then exit(0); 213 if ord(p^)<%10000000 then 214 begin 215 // regular single byte character 216 exit(1); 217 end 218 else if ord(p^)<%11000000 then 219 begin 220 // invalid single byte character 221 exit(0); 222 end 223 else if ((ord(p^) and %11100000) = %11000000) then 224 begin 225 // should be 2 byte character 226 if (ord(p[1]) and %11000000) = %10000000 then 227 exit(2) 228 else 229 exit(0); 230 end 231 else if ((ord(p^) and %11110000) = %11100000) then 232 begin 233 // should be 3 byte character 234 if ((ord(p[1]) and %11000000) = %10000000) 235 and ((ord(p[2]) and %11000000) = %10000000) then 236 exit(3) 237 else 238 exit(0); 239 end 240 else if ((ord(p^) and %11111000) = %11110000) then 241 begin 242 // should be 4 byte character 243 if ((ord(p[1]) and %11000000) = %10000000) 244 and ((ord(p[2]) and %11000000) = %10000000) 245 and ((ord(p[3]) and %11000000) = %10000000) then 246 exit(4) 247 else 248 exit(0); 249 end else 250 exit(0); 251end; 252 253function UTF8ToUTF16(const s: string): UnicodeString; 254begin 255 Result:=UTF8Decode(s); 256end; 257 258function UTF16ToUTF8(const s: UnicodeString): string; 259begin 260 if s='' then exit(''); 261 Result:=UTF8Encode(s); 262 // prevent UTF8 codepage appear in the strings - we don't need codepage 263 // conversion magic 264 SetCodePage(RawByteString(Result), CP_ACP, False); 265end; 266{$ENDIF} 267 268function IsNonUTF8System: boolean; 269begin 270 Result:=NonUTF8System; 271end; 272 273{$IFDEF UNIX} 274{$IFNDEF Darwin} 275function GetUnixEncoding: string; 276var 277 i: integer; 278begin 279 Result:=EncodingSystem; 280 i:=pos('.',Lang); 281 if (i>0) and (i<=length(Lang)) then 282 Result:=copy(Lang,i+1,length(Lang)-i); 283end; 284{$ENDIF} 285{$ENDIF} 286 287function GetDefaultTextEncoding: string; 288 289 290begin 291 if EncodingValid then 292 begin 293 Result:=DefaultTextEncoding; 294 exit; 295 end; 296 297 {$IFDEF Pas2js} 298 Result:=EncodingUTF8; 299 {$ELSE} 300 {$IFDEF Windows} 301 Result:=GetWindowsEncoding; 302 {$ELSE} 303 {$IFDEF Darwin} 304 Result:=EncodingUTF8; 305 {$ELSE} 306 // unix 307 Lang := GetEnvironmentVariable('LC_ALL'); 308 if Lang='' then 309 begin 310 Lang := GetEnvironmentVariable('LC_MESSAGES'); 311 if Lang='' then 312 Lang := GetEnvironmentVariable('LANG'); 313 end; 314 Result:=GetUnixEncoding; 315 {$ENDIF} 316 {$ENDIF} 317 {$ENDIF} 318 Result:=NormalizeEncoding(Result); 319 320 DefaultTextEncoding:=Result; 321 EncodingValid:=true; 322end; 323 324procedure InternalInit; 325begin 326 {$IFDEF FPC_HAS_CPSTRING} 327 SetMultiByteConversionCodePage(CP_UTF8); 328 // SetMultiByteFileSystemCodePage(CP_UTF8); not needed, this is the default under Windows 329 SetMultiByteRTLFileSystemCodePage(CP_UTF8); 330 331 GetDefaultTextEncoding; 332 {$IFDEF Windows} 333 gNonUTF8System:=true; 334 {$ELSE} 335 gNonUTF8System:=SysUtils.CompareText(DefaultTextEncoding,'UTF8')<>0; 336 {$ENDIF} 337 {$ENDIF} 338end; 339procedure SplitCmdLineParams(const Params: string; ParamList: TStrings; 340 ReadBackslash: boolean = false); 341// split spaces, quotes are parsed as single parameter 342// if ReadBackslash=true then \" is replaced to " and not treated as quote 343// #0 is always end 344type 345 TMode = (mNormal,mApostrophe,mQuote); 346var 347 p: Integer; 348 Mode: TMode; 349 Param: String; 350begin 351 p:=1; 352 while p<=length(Params) do 353 begin 354 // skip whitespace 355 while (p<=length(Params)) and (Params[p] in [' ',#9,#10,#13]) do inc(p); 356 if (p>length(Params)) or (Params[p]=#0) then 357 break; 358 // read param 359 Param:=''; 360 Mode:=mNormal; 361 while p<=length(Params) do 362 begin 363 case Params[p] of 364 #0: 365 break; 366 '\': 367 begin 368 inc(p); 369 if ReadBackslash then 370 begin 371 // treat next character as normal character 372 if (p>length(Params)) or (Params[p]=#0) then 373 break; 374 if ord(Params[p])<128 then 375 begin 376 Param+=Params[p]; 377 inc(p); 378 end else begin 379 // next character is already a normal character 380 end; 381 end else begin 382 // treat backslash as normal character 383 Param+='\'; 384 end; 385 end; 386 '''': 387 begin 388 inc(p); 389 case Mode of 390 mNormal: 391 Mode:=mApostrophe; 392 mApostrophe: 393 Mode:=mNormal; 394 mQuote: 395 Param+=''''; 396 end; 397 end; 398 '"': 399 begin 400 inc(p); 401 case Mode of 402 mNormal: 403 Mode:=mQuote; 404 mApostrophe: 405 Param+='"'; 406 mQuote: 407 Mode:=mNormal; 408 end; 409 end; 410 ' ',#9,#10,#13: 411 begin 412 if Mode=mNormal then break; 413 Param+=Params[p]; 414 inc(p); 415 end; 416 else 417 Param+=Params[p]; 418 inc(p); 419 end; 420 end; 421 //writeln('SplitCmdLineParams Param=#'+Param+'#'); 422 ParamList.Add(Param); 423 end; 424end; 425 426 427initialization 428 InternalInit; 429end. 430 431