1 {
2 Copyright (C) Alexey Torgashin, uvviewsoft.com
3 License: MPL 2.0 or LGPL
4 }
5 unit ATStringProc;
6 
7 {$mode objfpc}{$H+}
8 {$codepage utf8}
9 
10 interface
11 
12 uses
13   Classes, SysUtils, StrUtils,
14   LCLType, LCLIntf, Clipbrd,
15   ATSynEdit_Options,
16   ATSynEdit_UnicodeData,
17   ATSynEdit_RegExpr,
18   ATSynEdit_CharSizer;
19 
20 type
21   atString = UnicodeString;
22   atChar = WideChar;
23   PatChar = PWideChar;
24 
25 const
26   ATEditorCharXScale = 1024;
27 
28 type
29   TATEditorCharSize = record
30     //what is XScaled? it is the char-width, multiplied by ATEditorCharXScale and truncated.
31     //on win32/gtk/qt, XScaled is multiple of ATEditorCharXScale; but not on macOS.
32     //macOS has actually floating-number font width, e.g. 7.801 pixels of a single char in monospaced fonts
33     //(all ASCII chars have the same width, tested).
34     XScaled: Int64;
35     //macOS char-height is decimal-number.
36     Y: Int64;
37   end;
38 
39 type
40   TATLineChangeKind = (
41     cLineChangeEdited,
42     cLineChangeAdded,
43     cLineChangeDeleted,
44     cLineChangeDeletedAll
45     );
46 
47 type
48   TATIntArray = array of integer;
49   TATPointArray = array of TPoint;
50   TATInt64Array = array of Int64;
51 
52 const
53   //must be >= OptMaxLineLenForAccurateCharWidths
54   cMaxFixedArray = 1024;
55 
56 type
57   //must be with Int64 items, 32-bit is not enough for single line with len>40M
58   TATIntFixedArray = record
59     Data: packed array[0..cMaxFixedArray-1] of Int64;
60     Len: integer;
61   end;
62 
63   //must be with 'longint' items, it's for Dx offsets for rendering
64   TATInt32FixedArray = record
65     Data: packed array[0..cMaxFixedArray-1] of Longint;
66     Len: integer;
67   end;
68 
69 type
70   TATSimpleRange = record NFrom, NTo: integer; end;
71   TATSimpleRangeArray = array of TATSimpleRange;
72 
73 function IsStringWithUnicode(const S: string): boolean; inline;
74 function IsStringWithUnicode(const S: UnicodeString): boolean; inline;
75 
76 function SCharUpper(ch: WideChar): WideChar; inline;
77 function SCharLower(ch: WideChar): WideChar; inline;
78 
79 function SCaseTitle(const S, SNonWordChars: atString): atString;
80 function SCaseInvert(const S: atString): atString;
81 function SCaseSentence(const S, SNonWordChars: atString): atString;
82 
83 function StringOfCharW(ch: WideChar; Len: integer): UnicodeString;
84 
85 {$Z1}
86 type
87   TATLineEnds = (cEndNone, cEndWin, cEndUnix, cEndMac);
88 
89   TATLineState = (
90     cLineStateNone,
91     cLineStateChanged,
92     cLineStateAdded,
93     cLineStateSaved
94     );
95 
96 const
97   cLineEndStrings: array[TATLineEnds] of UnicodeString = ('', #13#10, #10, #13);
98   cLineEndNiceNames: array[TATLineEnds] of string = ('', 'CRLF', 'LF', 'CR');
99   cLineEndLength: array[TATLineEnds] of integer = (0, 2, 1, 1);
100 
101 const
102   BoolToPlusMinusOne: array[boolean] of integer = (-1, 1);
103 
104 var
105   EditorScalePercents: integer = 100;
106   EditorScaleFontPercents: integer = 0; //if 0, it follows previous variable
107 
108 function EditorScale(AValue: integer): integer; inline;
109 function EditorScaleFont(AValue: integer): integer;
110 
111 type
112   TATStringTabCalcEvent = function(Sender: TObject; ALineIndex, ACharIndex: integer): integer of object;
113   TATStringGetLenEvent = function(ALineIndex: integer): integer of object;
114 
115 type
116 
117   { TATStringTabHelper }
118 
119   TATStringTabHelper = class
120   private
121     //these arrays are local vars, placed here to alloc 2*4Kb not in stack
122     ListEnds: TATIntFixedArray;
123     ListMid: TATIntFixedArray;
124   public
125     TabSpaces: boolean;
126     TabSize: integer;
127     IndentSize: integer;
128     SenderObj: TObject;
129     OnCalcTabSize: TATStringTabCalcEvent;
130     OnCalcLineLen: TATStringGetLenEvent;
131     function CalcTabulationSize(ALineIndex, APos: integer): integer;
132     function TabsToSpaces(ALineIndex: integer; const S: atString): atString;
133     function TabsToSpaces_Length(ALineIndex: integer; const S: atString; AMaxLen: integer): integer;
134     function SpacesToTabs(ALineIndex: integer; const S: atString): atString;
135     function GetIndentExpanded(ALineIndex: integer; const S: atString): integer;
136     function CharPosToColumnPos(ALineIndex: integer; const S: atString; APos: integer): integer;
137     function ColumnPosToCharPos(ALineIndex: integer; const S: atString; AColumn: integer): integer;
138     function IndentUnindent(ALineIndex: integer; const Str: atString; ARight: boolean): atString;
139     procedure CalcCharOffsets(ALineIndex: integer; const S: atString; var AInfo: TATIntFixedArray; ACharsSkipped: integer = 0);
140     function CalcCharOffsetLast(ALineIndex: integer; const S: atString; ACharsSkipped: integer = 0): Int64;
141     function FindWordWrapOffset(ALineIndex: integer; const S: atString; AColumns: Int64;
142       const ANonWordChars: atString; AWrapIndented: boolean): integer;
143     function FindClickedPosition(ALineIndex: integer; const Str: atString;
144       constref ListOffsets: TATIntFixedArray;
145       APixelsFromLeft: Int64;
146       ACharSize: TATEditorCharSize;
147       AAllowVirtualPos: boolean;
148       out AEndOfLinePos: boolean): Int64;
149     procedure FindOutputSkipOffset(ALineIndex: integer; const S: atString;
150       AScrollPos: Int64; out ACharsSkipped: Int64; out ACellPercentsSkipped: Int64);
151   end;
152 
153 function IsCharEol(ch: widechar): boolean; inline;
154 function IsCharEol(ch: char): boolean; inline;
155 function IsCharWord(ch: widechar; const ANonWordChars: UnicodeString): boolean;
156 function IsCharWordA(ch: char): boolean; inline;
157 function IsCharWordInIdentifier(ch: widechar): boolean;
158 function IsCharDigit(ch: widechar): boolean; inline;
159 function IsCharDigit(ch: char): boolean; inline;
160 function IsCharSpace(ch: widechar): boolean; inline;
161 function IsCharSpace(ch: char): boolean; inline;
162 function IsCharSymbol(ch: widechar): boolean;
163 function IsCharHexDigit(ch: widechar): boolean; inline;
164 function IsCharHexDigit(ch: char): boolean; inline;
165 function HexDigitToInt(ch: char): integer;
166 
167 function IsCharSurrogateAny(ch: widechar): boolean; inline;
168 function IsCharSurrogateHigh(ch: widechar): boolean; inline;
169 function IsCharSurrogateLow(ch: widechar): boolean; inline;
170 
171 function IsStringSpaces(const S: atString): boolean; inline;
172 function IsStringSpaces(const S: atString; AFrom, ALen: integer): boolean;
173 
174 function SBeginsWith(const S, SubStr: UnicodeString): boolean;
175 function SBeginsWith(const S, SubStr: string): boolean;
176 function SBeginsWith(const S: UnicodeString; ch: WideChar): boolean; inline;
177 function SBeginsWith(const S: string; ch: char): boolean; inline;
178 function SEndsWith(const S, SubStr: UnicodeString): boolean;
179 function SEndsWith(const S, SubStr: string): boolean;
180 function SEndsWith(const S: UnicodeString; ch: WideChar): boolean; inline;
181 function SEndsWith(const S: string; ch: char): boolean; inline;
182 function SEndsWithEol(const S: string): boolean; inline;
183 function SEndsWithEol(const S: atString): boolean; inline;
184 
185 function STrimAll(const S: UnicodeString): UnicodeString;
186 function STrimLeft(const S: UnicodeString): UnicodeString;
187 function STrimRight(const S: UnicodeString): UnicodeString;
188 
189 function SGetIndentChars(const S: atString): integer;
190 function SGetIndentCharsToOpeningBracket(const S: atString): integer;
191 function SGetTrailingSpaceChars(const S: atString): integer;
192 function SGetNonSpaceLength(const S: atString): integer;
193 
194 function SStringHasEol(const S: atString): boolean; inline;
195 function SStringHasEol(const S: string): boolean; inline;
196 function SStringHasTab(const S: atString): boolean; inline;
197 function SStringHasTab(const S: string): boolean; inline;
198 //function SStringHasAsciiAndNoTabs(const S: atString): boolean;
199 //function SStringHasAsciiAndNoTabs(const S: string): boolean;
200 
201 function SRemoveNewlineChars(const S: atString): atString;
202 
203 function SGetItem(var S: string; const ch: Char = ','): string;
204 procedure SSwapEndianWide(var S: UnicodeString);
205 procedure SSwapEndianUCS4(var S: UCS4String); inline;
206 procedure SAddStringToHistory(const S: string; List: TStrings; MaxItems: integer);
207 
208 procedure TrimStringList(L: TStringList); inline;
209 
210 const
211   cDefaultNonWordChars: UnicodeString = '-+*=/\()[]{}<>"''.,:;~?!@#$%^&|`…';
212 
213 type
214   TATDecodeRec = record SFrom, STo: UnicodeString; end;
215 function SDecodeRecords(const S: UnicodeString; const Decode: array of TATDecodeRec): UnicodeString;
216 
217 procedure SReplaceAll(var S: string; const SFrom, STo: string); inline;
218 procedure SReplaceAllPercentChars(var S: string);
219 procedure SDeleteFrom(var s: string; const SFrom: string); inline;
220 procedure SDeleteFrom(var s: UnicodeString; const SFrom: UnicodeString); inline;
221 procedure SDeleteFromEol(var S: string);
222 procedure SDeleteFromEol(var S: UnicodeString);
223 
224 procedure SClipboardCopy(AText: string; AClipboardObj: TClipboard=nil);
225 function SFindCharCount(const S: string; ch: char): integer;
226 function SFindCharCount(const S: UnicodeString; ch: WideChar): integer;
227 function SFindRegexMatch(const Subject, Regex: UnicodeString; out MatchPos, MatchLen: integer): boolean;
228 function SFindRegexMatch(const Subject, Regex: UnicodeString; GroupIndex: integer; ModS, ModI, ModM: boolean): UnicodeString;
229 function SCountTextOccurrences(const SubStr, Str: UnicodeString): integer;
230 function SCountTextLines(const Str, StrBreak: UnicodeString): integer;
231 procedure SSplitByChar(const S: string; Sep: char; out S1, S2: string);
232 
233 
234 implementation
235 
236 uses
237   Dialogs, Math;
238 
239 function IsCharEol(ch: widechar): boolean;
240 begin
241   Result:= (ch=#10) or (ch=#13);
242 end;
243 
244 function IsCharEol(ch: char): boolean;
245 begin
246   Result:= (ch=#10) or (ch=#13);
247 end;
248 
249 function IsCharWord(ch: widechar; const ANonWordChars: UnicodeString): boolean;
250 begin
251   //to make '_' non-word char, specify it as _first_ in ANonWordChars
252   if ch='_' then
253     exit( (ANonWordChars='') or (ANonWordChars[1]<>'_') );
254 
255   //if it's unicode letter, return true, ignore option string
256   //if it's not letter, check for option (maybe char '$' is present there for PHP lexer)
257   // bit 7 in value: is word char
258   Result := CharCategoryArray[Ord(ch)] and 128 <> 0;
259   if not Result then
260   begin
261     if IsCharUnicodeSpace(ch) then
262       exit(false);
263 
264     if Pos(ch, ANonWordChars)=0 then
265       exit(true);
266   end;
267 end;
268 
269 function IsCharWordA(ch: char): boolean;
270 begin
271   // bit 7 in value: is word char
272   Result := CharCategoryArray[Ord(ch)] and 128 <> 0;
273 end;
274 
275 
276 function IsCharWordInIdentifier(ch: widechar): boolean;
277 begin
278   if Ord(ch)>Ord('z') then
279     exit(false);
280   case ch of
281     '0'..'9',
282     'a'..'z',
283     'A'..'Z',
284     '_':
285       Result:= true;
286     else
287       Result:= false;
288   end;
289 end;
290 
291 function IsCharDigit(ch: widechar): boolean;
292 begin
293   Result:= (ch>='0') and (ch<='9');
294 end;
295 
296 function IsCharDigit(ch: char): boolean;
297 begin
298   Result:= (ch>='0') and (ch<='9');
299 end;
300 
301 function IsCharHexDigit(ch: widechar): boolean;
302 begin
303   case ch of
304     '0'..'9',
305     'a'..'f',
306     'A'..'F':
307       Result:= true
308     else
309       Result:= false;
310   end;
311 end;
312 
313 function IsCharHexDigit(ch: char): boolean;
314 begin
315   case ch of
316     '0'..'9',
317     'a'..'f',
318     'A'..'F':
319       Result:= true
320     else
321       Result:= false;
322   end;
323 end;
324 
325 function IsCharSpace(ch: widechar): boolean;
326 begin
327   Result:= IsCharUnicodeSpace(ch);
328 end;
329 
330 function IsCharSpace(ch: char): boolean;
331 begin
332   case ch of
333     #9, //tab
334     ' ', //space
335     #$A0: //no-break space, NBSP, often used on macOS
336       Result:= true;
337     else
338       Result:= false;
339   end;
340 end;
341 
342 function IsCharSymbol(ch: widechar): boolean;
343 begin
344   Result:= Pos(ch, '.,;:''"/\-+*=()[]{}<>?!@#$%^&|~`')>0;
345 end;
346 
347 function IsCharSurrogateAny(ch: widechar): boolean;
348 begin
349   Result:= (ch>=#$D800) and (ch<=#$DFFF);
350 end;
351 
352 function IsCharSurrogateHigh(ch: widechar): boolean;
353 begin
354   Result:= (ch>=#$D800) and (ch<=#$DBFF);
355 end;
356 
357 function IsCharSurrogateLow(ch: widechar): boolean;
358 begin
359   Result:= (ch>=#$DC00) and (ch<=#$DFFF);
360 end;
361 
362 
363 function IsStringSpaces(const S: atString): boolean;
364 var
365   i: integer;
366 begin
367   for i:= 1 to Length(S) do
368     if not IsCharSpace(S[i]) then
369       exit(false);
370   Result:= true;
371 end;
372 
373 function IsStringSpaces(const S: atString; AFrom, ALen: integer): boolean;
374 var
375   i: integer;
376 begin
377   for i:= AFrom to Min(AFrom+ALen-1, Length(S)) do
378     if not IsCharSpace(S[i]) then
379       exit(false);
380   Result:= true;
381 end;
382 
383 {
384 function SStringHasUnicodeChars(const S: atString): boolean;
385 var
386   i, N: integer;
387 begin
388   for i:= 1 to Length(S) do
389   begin
390     N:= Ord(S[i]);
391     if (N<32) or (N>126) then exit(true);
392   end;
393   Result:= false;
394 end;
395 }
396 
397 procedure DoDebugOffsets(const Info: TATIntFixedArray);
398 var
399   i: integer;
400   s: string;
401 begin
402   s:= '';
403   for i:= 0 to Info.Len-1 do
404     s:= s+IntToStr(Info.Data[i])+'% ';
405   ShowMessage('Offsets'#10+s);
406 end;
407 
FindWordWrapOffsetnull408 function TATStringTabHelper.FindWordWrapOffset(ALineIndex: integer; const S: atString; AColumns: Int64;
409   const ANonWordChars: atString; AWrapIndented: boolean): integer;
410   //
411   //override IsCharWord to check also commas,dots,quotes
412   //to wrap them with wordchars
413   function _IsWord(ch: widechar): boolean; inline;
414   begin
415     if Pos(ch, ATEditorOptions.CommaCharsWrapWithWords)>0 then
416       Result:= true
417     else
418       Result:= IsCharWord(ch, ANonWordChars);
419   end;
420   //
421 var
422   N, NMin, NAvg: integer;
423   Offsets: TATIntFixedArray;
424 begin
425   if S='' then
426     Exit(0);
427   if AColumns<ATEditorOptions.MinWordWrapOffset then
428     Exit(AColumns);
429 
430   CalcCharOffsets(ALineIndex, S, Offsets);
431 
432   if Offsets.Data[Offsets.Len-1]<=AColumns*100 then
433     Exit(Length(S));
434 
435   //NAvg is average wrap offset, we use it if no correct offset found
436   N:= Min(Length(S), cMaxFixedArray)-1;
437   while (N>0) and (Offsets.Data[N]>(AColumns+1)*100) do Dec(N);
438   NAvg:= N;
439   if NAvg<ATEditorOptions.MinWordWrapOffset then
440     Exit(ATEditorOptions.MinWordWrapOffset);
441 
442   //find correct offset: not allowed at edge
443   //a) 2 wordchars,
444   //b) space as 2nd char (not nice look for Python src)
445   NMin:= SGetIndentChars(S)+1;
446   while (N>NMin) and
447     (IsCharSurrogateLow(S[N+1]) or
448      (_IsWord(S[N]) and _IsWord(S[N+1])) or
449      (AWrapIndented and IsCharSpace(S[N+1])))
450     do Dec(N);
451 
452   //use correct of avg offset
453   if N>NMin then
454     Result:= N
455   else
456     Result:= NAvg;
457 end;
458 
459 function SGetIndentChars(const S: atString): integer;
460 begin
461   Result:= 0;
462   while (Result<Length(S)) and IsCharSpace(S[Result+1]) do
463     Inc(Result);
464 end;
465 
466 function SGetTrailingSpaceChars(const S: atString): integer;
467 var
468   N: integer;
469 begin
470   Result:= 0;
471   N:= Length(S);
472   while (N>0) and IsCharSpace(S[N]) do
473   begin
474     Inc(Result);
475     Dec(N);
476   end;
477 end;
478 
479 
480 function SGetIndentCharsToOpeningBracket(const S: atString): integer;
481 var
482   n: integer;
483 begin
484   Result:= 0;
485   n:= Length(S);
486   //note RPos() don't work with UnicodeString
487   while (n>0) and (S[n]<>'(') do Dec(n);
488   if n>0 then
489     //test that found bracket is not closed
490     if PosEx(')', S, n)=0 then
491       Result:= n;
492 end;
493 
SGetNonSpaceLengthnull494 function SGetNonSpaceLength(const S: atString): integer;
495 begin
496   Result:= Length(S);
497   while (Result>0) and IsCharSpace(S[Result]) do Dec(Result);
498   if Result=0 then
499     Result:= Length(S);
500 end;
501 
502 procedure SSwapEndianWide(var S: UnicodeString);
503 var
504   i: integer;
505   P: PWord;
506 begin
507   if S='' then exit;
508   UniqueString(S);
509   for i:= 1 to Length(S) do
510   begin
511     P:= @S[i];
512     P^:= SwapEndian(P^);
513   end;
514 end;
515 
516 procedure SSwapEndianUCS4(var S: UCS4String);
517 var
518   i: integer;
519 begin
520   for i:= 0 to Length(S)-1 do
521     S[i]:= SwapEndian(S[i]);
522 end;
523 
TATStringTabHelper.CalcTabulationSizenull524 function TATStringTabHelper.CalcTabulationSize(ALineIndex, APos: integer): integer;
525 begin
526   if Assigned(OnCalcTabSize) then
527     Result:= OnCalcTabSize(SenderObj, ALineIndex, APos)
528   else
529   if Assigned(OnCalcLineLen) and (OnCalcLineLen(ALineIndex)>ATEditorOptions.MaxLineLenForAccurateCharWidths) then
530     Result:= 1
531   else
532   if APos>ATEditorOptions.MaxTabPositionToExpand then
533     Result:= 1
534   else
535     Result:= TabSize - (APos-1) mod TabSize;
536 end;
537 
538 
TATStringTabHelper.GetIndentExpandednull539 function TATStringTabHelper.GetIndentExpanded(ALineIndex: integer; const S: atString): integer;
540 var
541   ch: widechar;
542   i: integer;
543 begin
544   Result:= 0;
545   for i:= 1 to Length(S) do
546   begin
547     ch:= S[i];
548     if not IsCharSpace(ch) then exit;
549     if ch<>#9 then
550       Inc(Result)
551     else
552       Inc(Result, CalcTabulationSize(ALineIndex, Result+1));
553   end;
554 end;
555 
556 
TabsToSpacesnull557 function TATStringTabHelper.TabsToSpaces(ALineIndex: integer; const S: atString): atString;
558 var
559   N, NSize: integer;
560 begin
561   Result:= S;
562   N:= 0;
563   repeat
564     N:= PosEx(#9, Result, N+1);
565     if N=0 then Break;
566     NSize:= CalcTabulationSize(ALineIndex, N);
567     if NSize<2 then
568       Result[N]:= ' '
569     else
570     begin
571       Result[N]:= ' ';
572       Insert(StringOfCharW(' ', NSize-1), Result, N);
573     end;
574   until false;
575 end;
576 
TATStringTabHelper.TabsToSpaces_Lengthnull577 function TATStringTabHelper.TabsToSpaces_Length(ALineIndex: integer; const S: atString;
578   AMaxLen: integer): integer;
579 var
580   i: integer;
581 begin
582   Result:= 0;
583   if AMaxLen<0 then
584     AMaxLen:= Length(S);
585   for i:= 1 to AMaxLen do
586     if S[i]<>#9 then
587       Inc(Result)
588     else
589       Inc(Result, CalcTabulationSize(ALineIndex, Result+1));
590 end;
591 
592 
593 procedure TATStringTabHelper.CalcCharOffsets(ALineIndex: integer; const S: atString;
594   var AInfo: TATIntFixedArray; ACharsSkipped: integer);
595 var
596   NLen, NSize, NTabSize, NCharsSkipped: integer;
597   NScalePercents: integer;
598   //NPairSize: integer;
599   //StrPair: WideString;
600   ch: widechar;
601   i: integer;
602 begin
603   FillChar(AInfo, SizeOf(AInfo), 0);
604   NLen:= Min(Length(S), cMaxFixedArray);
605   AInfo.Len:= NLen;
606   if NLen=0 then Exit;
607 
608   NCharsSkipped:= ACharsSkipped;
609   //NPairSize:= 0;
610   //StrPair:= 'ab';
611 
612   if NLen>ATEditorOptions.MaxLineLenForAccurateCharWidths then
613   begin
614     for i:= 0 to NLen-1 do
615       AInfo.Data[i]:= (Int64(i)+1)*100;
616     exit;
617   end;
618 
619   for i:= 1 to NLen do
620   begin
621     ch:= S[i];
622     Inc(NCharsSkipped);
623 
624     {
625     ////if used GetStrWidth, then strange bug on Win32, Emoji wrap pos is not ok
626     if (NPairSize>0) and IsCharSurrogateLow(ch) then
627     begin
628       NScalePercents:= NPairSize div 2;
629       NPairSize:= 0;
630     end
631     else
632     if IsCharSurrogateHigh(ch) and (i<Length(S)) then
633     begin
634       StrPair[1]:= ch;
635       StrPair[2]:= S[i+1];
636       NPairSize:= GlobalCharSizer.GetStrWidth(StrPair);
637       NScalePercents:= NPairSize - NPairSize div 2;
638     end
639     }
640     if IsCharSurrogateAny(ch) then
641     begin
642       NScalePercents:= ATEditorOptions.EmojiWidthPercents div 2;
643     end
644     else
645     begin
646       NScalePercents:= GlobalCharSizer.GetCharWidth(ch);
647       //NPairSize:= 0;
648     end;
649 
650     if ch<>#9 then
651       NSize:= 1
652     else
653     begin
654       NTabSize:= CalcTabulationSize(ALineIndex, NCharsSkipped);
655       NSize:= NTabSize;
656       Inc(NCharsSkipped, NTabSize-1);
657     end;
658 
659     if i=1 then
660       AInfo.Data[i-1]:= Int64(NSize)*NScalePercents
661     else
662       AInfo.Data[i-1]:= AInfo.Data[i-2]+Int64(NSize)*NScalePercents;
663   end;
664 end;
665 
TATStringTabHelper.CalcCharOffsetLastnull666 function TATStringTabHelper.CalcCharOffsetLast(ALineIndex: integer; const S: atString;
667   ACharsSkipped: integer): Int64;
668 var
669   NLen, NSize, NTabSize, NCharsSkipped: integer;
670   NScalePercents: integer;
671   ch: WideChar;
672   i: integer;
673 begin
674   Result:= 0;
675   NLen:= Length(S);
676   if NLen=0 then Exit;
677 
678   if NLen>ATEditorOptions.MaxLineLenForAccurateCharWidths then
679     exit(NLen*100);
680 
681   NCharsSkipped:= ACharsSkipped;
682 
683   for i:= 1 to NLen do
684   begin
685     ch:= S[i];
686     Inc(NCharsSkipped);
687 
688     if IsCharSurrogateAny(ch) then
689     begin
690       NScalePercents:= ATEditorOptions.EmojiWidthPercents div 2;
691     end
692     else
693     begin
694       NScalePercents:= GlobalCharSizer.GetCharWidth(ch);
695     end;
696 
697     if ch<>#9 then
698       NSize:= 1
699     else
700     begin
701       NTabSize:= CalcTabulationSize(ALineIndex, NCharsSkipped);
702       NSize:= NTabSize;
703       Inc(NCharsSkipped, NTabSize-1);
704     end;
705 
706     Inc(Result, Int64(NSize)*NScalePercents);
707   end;
708 end;
709 
710 
FindClickedPositionnull711 function TATStringTabHelper.FindClickedPosition(ALineIndex: integer; const Str: atString;
712   constref ListOffsets: TATIntFixedArray;
713   APixelsFromLeft: Int64;
714   ACharSize: TATEditorCharSize;
715   AAllowVirtualPos: boolean; out AEndOfLinePos: boolean): Int64;
716 var
717   i: integer;
718 begin
719   AEndOfLinePos:= false;
720   if Str='' then
721   begin
722     Result:= 1;
723     if AAllowVirtualPos then
724       Inc(Result, APixelsFromLeft * ATEditorCharXScale div ACharSize.XScaled);
725     Exit;
726   end;
727 
728   ListEnds.Len:= ListOffsets.Len;
729   ListMid.Len:= ListOffsets.Len;
730 
731   //positions of each char end
732   for i:= 0 to ListOffsets.Len-1 do
733     ListEnds.Data[i]:= ListOffsets.Data[i]*ACharSize.XScaled div ATEditorCharXScale div 100;
734 
735   //positions of each char middle
736   for i:= 0 to ListOffsets.Len-1 do
737     if i=0 then
738       ListMid.Data[i]:= ListEnds.Data[i] div 2
739     else
740       ListMid.Data[i]:= (ListEnds.Data[i-1]+ListEnds.Data[i]) div 2;
741 
742   for i:= 0 to ListOffsets.Len-1 do
743     if APixelsFromLeft<ListMid.Data[i] then
744     begin
745       Result:= i+1;
746 
747       //don't get position inside utf16 surrogate pair
748       if (Result<=Length(Str)) and IsCharSurrogateLow(Str[Result]) then
749         Inc(Result);
750 
751       Exit
752     end;
753 
754   AEndOfLinePos:= true;
755 
756   Result:= ListEnds.Len + (APixelsFromLeft - ListEnds.Data[ListEnds.Len-1]) * ATEditorCharXScale div ACharSize.XScaled + 1;
757   ////this works
758   ////a) better if clicked after line end, far
759   ////b) bad if clicked exactly on line end (shifted to right by 1)
760   //Result:= ListEnds.Len + (APixelsFromLeft - ListEnds.Data[ListEnds.Len-1] - ACharSize div 2) div ACharSize + 2;
761 
762   if not AAllowVirtualPos then
763     Result:= Min(Result, Length(Str)+1);
764 end;
765 
766 procedure TATStringTabHelper.FindOutputSkipOffset(ALineIndex: integer; const S: atString;
767   AScrollPos: Int64; out ACharsSkipped: Int64; out ACellPercentsSkipped: Int64);
768 var
769   Offsets: TATIntFixedArray;
770 begin
771   ACharsSkipped:= 0;
772   ACellPercentsSkipped:= 0;
773   if (S='') or (AScrollPos=0) then Exit;
774 
775   CalcCharOffsets(ALineIndex, S, Offsets);
776 
777   while (ACharsSkipped<Offsets.Len) and
778     (Offsets.Data[ACharsSkipped] < AScrollPos*100) do
779     Inc(ACharsSkipped);
780 
781   if (ACharsSkipped>0) then
782     ACellPercentsSkipped:= Offsets.Data[ACharsSkipped-1];
783 end;
784 
785 function SGetItem(var S: string; const ch: Char = ','): string;
786 var
787   i: integer;
788 begin
789   i:= Pos(ch, S);
790   if i=0 then
791   begin
792     Result:= S;
793     S:= '';
794   end
795   else
796   begin
797     Result:= Copy(S, 1, i-1);
798     Delete(S, 1, i);
799   end;
800 end;
801 
802 
803 procedure TrimStringList(L: TStringList);
804 begin
805   //dont do "while", we need correct last empty lines
806   if (L.Count>0) and (L[L.Count-1]='') then
807     L.Delete(L.Count-1);
808 end;
809 
810 function SStringHasEol(const S: atString): boolean;
811 begin
812   Result:=
813     (Pos(#10, S)>0) or
814     (Pos(#13, S)>0);
815 end;
816 
817 function SStringHasEol(const S: string): boolean;
818 begin
819   Result:=
820     (Pos(#10, S)>0) or
821     (Pos(#13, S)>0);
822 end;
823 
SpacesToTabsnull824 function TATStringTabHelper.SpacesToTabs(ALineIndex: integer; const S: atString): atString;
825 begin
826   Result:= StringReplace(S, StringOfCharW(' ', TabSize), WideChar(9), [rfReplaceAll]);
827 end;
828 
CharPosToColumnPosnull829 function TATStringTabHelper.CharPosToColumnPos(ALineIndex: integer; const S: atString;
830   APos: integer): integer;
831 begin
832   if APos>Length(S) then
833     Result:= TabsToSpaces_Length(ALineIndex, S, -1) + APos-Length(S)
834   else
835     Result:= TabsToSpaces_Length(ALineIndex, S, APos);
836 end;
837 
ColumnPosToCharPosnull838 function TATStringTabHelper.ColumnPosToCharPos(ALineIndex: integer; const S: atString;
839   AColumn: integer): integer;
840 var
841   size, i: integer;
842 begin
843   if AColumn=0 then exit(AColumn);
844   if not SStringHasTab(S) then exit(AColumn);
845 
846   size:= 0;
847   for i:= 1 to Length(S) do
848   begin
849     if S[i]<>#9 then
850       Inc(size)
851     else
852       Inc(size, CalcTabulationSize(ALineIndex, size+1));
853     if size>=AColumn then
854       exit(i);
855   end;
856 
857   //column is too big, after line end
858   Result:= AColumn - size + Length(S);
859 end;
860 
861 function SStringHasTab(const S: atString): boolean;
862 begin
863   Result:= Pos(#9, S)>0;
864 end;
865 
866 function SStringHasTab(const S: string): boolean;
867 begin
868   Result:= Pos(#9, S)>0;
869 end;
870 
871 
872 (*
873 function SStringHasAsciiAndNoTabs(const S: atString): boolean;
874 var
875   code, i: integer;
876 begin
877   for i:= 1 to Length(S) do
878   begin
879     code:= Ord(S[i]);
880     if (code<32) or (code>=127) then
881       exit(false);
882   end;
883   Result:= true;
884 end;
885 
886 function SStringHasAsciiAndNoTabs(const S: string): boolean;
887 var
888   code, i: integer;
889 begin
890   for i:= 1 to Length(S) do
891   begin
892     code:= Ord(S[i]);
893     if (code<32) or (code>=127) then
894       exit(false);
895   end;
896   Result:= true;
897 end;
898 *)
899 
IndentUnindentnull900 function TATStringTabHelper.IndentUnindent(ALineIndex: integer; const Str: atString;
901   ARight: boolean): atString;
902 var
903   StrIndent, StrText: atString;
904   DecSpaces, N: integer;
905   DoTabs: boolean;
906 begin
907   Result:= Str;
908 
909   if IndentSize=0 then
910   begin
911     if TabSpaces then
912       StrIndent:= StringOfCharW(' ', TabSize)
913     else
914       StrIndent:= #9;
915     DecSpaces:= TabSize;
916   end
917   else
918   if IndentSize>0 then
919   begin
920     //use spaces
921     StrIndent:= StringOfCharW(' ', IndentSize);
922     DecSpaces:= IndentSize;
923   end
924   else
925   begin
926     //indent<0 - use tabs
927     StrIndent:= StringOfCharW(#9, Abs(IndentSize));
928     DecSpaces:= Abs(IndentSize)*TabSize;
929   end;
930 
931   if ARight then
932     Result:= StrIndent+Str
933   else
934   begin
935     N:= SGetIndentChars(Str);
936     StrIndent:= Copy(Str, 1, N);
937     StrText:= Copy(Str, N+1, MaxInt);
938     DoTabs:= SStringHasTab(StrIndent);
939 
940     StrIndent:= TabsToSpaces(ALineIndex, StrIndent);
941     if DecSpaces>Length(StrIndent) then
942       DecSpaces:= Length(StrIndent);
943     Delete(StrIndent, 1, DecSpaces);
944 
945     if DoTabs then
946       StrIndent:= SpacesToTabs(ALineIndex, StrIndent);
947     Result:= StrIndent+StrText;
948   end;
949 end;
950 
951 
952 function SRemoveNewlineChars(const S: atString): atString;
953 var
954   i: integer;
955 begin
956   Result:= S;
957   for i:= 1 to Length(Result) do
958     if IsCharEol(Result[i]) then
959       Result[i]:= ' ';
960 end;
961 
962 function STrimAll(const S: unicodestring): unicodestring;
963 var
964   Ofs, Len: sizeint;
965 begin
966   len := Length(S);
967   while (Len>0) and (IsCharSpace(S[Len])) do
968    dec(Len);
969   Ofs := 1;
970   while (Ofs<=Len) and (IsCharSpace(S[Ofs])) do
971     Inc(Ofs);
972   result := Copy(S, Ofs, 1 + Len - Ofs);
973 end;
974 
975 function STrimLeft(const S: unicodestring): unicodestring;
976 var
977   i,l:sizeint;
978 begin
979   l := length(s);
980   i := 1;
981   while (i<=l) and (IsCharSpace(s[i])) do
982     inc(i);
983   Result := copy(s, i, l);
984 end;
985 
986 function STrimRight(const S: unicodestring): unicodestring;
987 var
988   l:sizeint;
989 begin
990   l := length(s);
991   while (l>0) and (IsCharSpace(s[l])) do
992     dec(l);
993   result := copy(s,1,l);
994 end;
995 
996 function SBeginsWith(const S, SubStr: UnicodeString): boolean;
997 var
998   i: integer;
999 begin
1000   Result:= false;
1001   if S='' then exit;
1002   if SubStr='' then exit;
1003   if Length(SubStr)>Length(S) then exit;
1004   for i:= 1 to Length(SubStr) do
1005     if S[i]<>SubStr[i] then exit;
1006   Result:= true;
1007 end;
1008 
1009 function SBeginsWith(const S, SubStr: string): boolean;
1010 var
1011   i: integer;
1012 begin
1013   Result:= false;
1014   if S='' then exit;
1015   if SubStr='' then exit;
1016   if Length(SubStr)>Length(S) then exit;
1017   for i:= 1 to Length(SubStr) do
1018     if S[i]<>SubStr[i] then exit;
1019   Result:= true;
1020 end;
1021 
1022 function SEndsWith(const S, SubStr: UnicodeString): boolean;
1023 var
1024   i, Offset: integer;
1025 begin
1026   Result:= false;
1027   if S='' then exit;
1028   if SubStr='' then exit;
1029   Offset:= Length(S)-Length(SubStr);
1030   if Offset<0 then exit;
1031   for i:= 1 to Length(SubStr) do
1032     if S[i+Offset]<>SubStr[i] then exit;
1033   Result:= true;
1034 end;
1035 
1036 function SEndsWith(const S, SubStr: string): boolean;
1037 var
1038   i, Offset: integer;
1039 begin
1040   Result:= false;
1041   if S='' then exit;
1042   if SubStr='' then exit;
1043   Offset:= Length(S)-Length(SubStr);
1044   if Offset<0 then exit;
1045   for i:= 1 to Length(SubStr) do
1046     if S[i+Offset]<>SubStr[i] then exit;
1047   Result:= true;
1048 end;
1049 
1050 function SBeginsWith(const S: UnicodeString; ch: WideChar): boolean;
1051 begin
1052   Result:= (S<>'') and (S[1]=ch);
1053 end;
1054 
1055 function SBeginsWith(const S: string; ch: char): boolean;
1056 begin
1057   Result:= (S<>'') and (S[1]=ch);
1058 end;
1059 
1060 function SEndsWith(const S: UnicodeString; ch: WideChar): boolean;
1061 begin
1062   Result:= (S<>'') and (S[Length(S)]=ch);
1063 end;
1064 
1065 function SEndsWith(const S: string; ch: char): boolean;
1066 begin
1067   Result:= (S<>'') and (S[Length(S)]=ch);
1068 end;
1069 
1070 function SEndsWithEol(const S: string): boolean;
1071 begin
1072   Result:= (S<>'') and IsCharEol(S[Length(S)]);
1073 end;
1074 
1075 function SEndsWithEol(const S: atString): boolean;
1076 begin
1077   Result:= (S<>'') and IsCharEol(S[Length(S)]);
1078 end;
1079 
1080 function SCharUpper(ch: WideChar): WideChar;
1081 begin
1082   Result := CharUpperArray[Ord(Ch)];
1083 end;
1084 
1085 function SCharLower(ch: WideChar): WideChar;
1086 begin
1087   Result := CharLowerArray[Ord(Ch)];
1088 end;
1089 
1090 
1091 function SCaseTitle(const S, SNonWordChars: atString): atString;
1092 var
1093   i: integer;
1094 begin
1095   Result:= S;
1096   for i:= 1 to Length(Result) do
1097     if (i=1) or not IsCharWord(S[i-1], SNonWordChars) then
1098       Result[i]:= SCharUpper(Result[i])
1099     else
1100       Result[i]:= SCharLower(Result[i]);
1101 end;
1102 
1103 function SCaseInvert(const S: atString): atString;
1104 var
1105   ch, ch_up: WideChar;
1106   i: integer;
1107 begin
1108   Result:= S;
1109   for i:= 1 to Length(Result) do
1110   begin
1111     ch:= Result[i];
1112     ch_up:= SCharUpper(ch);
1113     if ch<>ch_up then
1114       Result[i]:= ch_up
1115     else
1116       Result[i]:= SCharLower(ch);
1117   end;
1118 end;
1119 
1120 function SCaseSentence(const S, SNonWordChars: atString): atString;
1121 var
1122   dot: boolean;
1123   ch: WideChar;
1124   i: Integer;
1125 begin
1126   Result:= S;
1127   dot:= True;
1128   for i:= 1 to Length(Result) do
1129   begin
1130     ch:= Result[i];
1131     if IsCharWord(ch, SNonWordChars) then
1132     begin
1133       if dot then
1134         Result[i]:= SCharUpper(ch)
1135       else
1136         Result[i]:= SCharLower(ch);
1137       dot:= False;
1138     end
1139     else
1140       if (ch = '.') or (ch = '!') or (ch = '?') then
1141         dot:= True;
1142   end;
1143 end;
1144 
1145 function StringOfCharW(ch: WideChar; Len: integer): UnicodeString;
1146 var
1147   i: integer;
1148 begin
1149   SetLength(Result, Len);
1150   for i:= 1 to Len do
1151     Result[i]:= ch;
1152 end;
1153 
1154 
1155 function SDecodeRecords(const S: UnicodeString; const Decode: array of TATDecodeRec): UnicodeString;
1156 var
1157   DoDecode: Boolean;
1158   i, iPart: integer;
1159 begin
1160   Result := '';
1161   i := 1;
1162   repeat
1163     if i > Length(S) then Break;
1164     DoDecode := False;
1165     for iPart := Low(Decode) to High(Decode) do
1166       with Decode[iPart] do
1167         if strlcomp(PChar(SFrom), @S[i], Length(SFrom)) = 0 then
1168         begin
1169           DoDecode := True;
1170           Result := Result + STo;
1171           Inc(i, Length(SFrom));
1172           Break
1173         end;
1174     if DoDecode then Continue;
1175     Result := Result + S[i];
1176     Inc(i);
1177   until False;
1178 end;
1179 
1180 
1181 procedure SReplaceAll(var S: string; const SFrom, STo: string);
1182 begin
1183   S:= StringReplace(S, SFrom, STo, [rfReplaceAll]);
1184 end;
1185 
1186 procedure SReplaceAllPercentChars(var S: string);
1187 var
1188   i: Integer;
1189 begin
1190   for i:= $20 to $2F do
1191     SReplaceAll(S, '%'+IntToHex(i, 2), Chr(i));
1192 
1193   i:= $7C;
1194   SReplaceAll(S, '%'+IntToHex(i, 2), Chr(i));
1195 end;
1196 
1197 procedure SDeleteFrom(var s: string; const SFrom: string);
1198 var
1199   n: integer;
1200 begin
1201   n:= Pos(SFrom, S);
1202   if n>0 then
1203     SetLength(S, n-1);
1204 end;
1205 
1206 procedure SDeleteFrom(var s: UnicodeString; const SFrom: UnicodeString);
1207 var
1208   n: integer;
1209 begin
1210   n:= Pos(SFrom, S);
1211   if n>0 then
1212     SetLength(S, n-1);
1213 end;
1214 
1215 procedure SDeleteFromEol(var S: string);
1216 var
1217   i: integer;
1218   ch: char;
1219 begin
1220   for i:= 1 to Length(S) do
1221   begin
1222     ch:= S[i];
1223     if (ch=#10) or (ch=#13) then
1224     begin
1225       SetLength(S, i-1);
1226       Exit;
1227     end;
1228   end;
1229 end;
1230 
1231 procedure SDeleteFromEol(var S: UnicodeString);
1232 var
1233   i: integer;
1234   ch: WideChar;
1235 begin
1236   for i:= 1 to Length(S) do
1237   begin
1238     ch:= S[i];
1239     if (ch=#10) or (ch=#13) then
1240     begin
1241       SetLength(S, i-1);
1242       Exit;
1243     end;
1244   end;
1245 end;
1246 
1247 procedure SAddStringToHistory(const S: string; List: TStrings; MaxItems: integer);
1248 var
1249   n: integer;
1250 begin
1251   if s<>'' then
1252   begin
1253     n:= List.IndexOf(s);
1254     if n>=0 then
1255       List.Delete(n);
1256     List.Insert(0, s);
1257   end;
1258 
1259   while List.Count>MaxItems do
1260     List.Delete(List.Count-1);
1261 end;
1262 
1263 procedure SClipboardCopy(AText: string; AClipboardObj: TClipboard=nil);
1264 begin
1265   if AText='' then exit;
1266   if AClipboardObj=nil then
1267     AClipboardObj:= Clipboard;
1268 
1269   {$IFDEF LCLGTK2}
1270   //Workaround for Lazarus bug #0021453. LCL adds trailing zero to clipboard in Clipboard.AsText.
1271   AClipboardObj.Clear;
1272   AClipboardObj.AddFormat(PredefinedClipboardFormat(pcfText), AText[1], Length(AText));
1273   {$ELSE}
1274   AClipboardObj.AsText:= AText;
1275   {$ENDIF}
1276 end;
1277 
1278 
1279 function SFindCharCount(const S: string; ch: char): integer;
1280 var
1281   i: integer;
1282 begin
1283   Result:= 0;
1284   for i:= 1 to Length(S) do
1285     if S[i]=ch then
1286       Inc(Result);
1287 end;
1288 
1289 function SFindCharCount(const S: UnicodeString; ch: WideChar): integer;
1290 var
1291   i: integer;
1292 begin
1293   Result:= 0;
1294   for i:= 1 to Length(S) do
1295     if S[i]=ch then
1296       Inc(Result);
1297 end;
1298 
1299 
1300 function EditorScale(AValue: integer): integer;
1301 begin
1302   Result:= AValue * EditorScalePercents div 100;
1303 end;
1304 
1305 function EditorScaleFont(AValue: integer): integer;
1306 begin
1307   if EditorScaleFontPercents=0 then
1308     Result:= EditorScale(AValue)
1309   else
1310     Result:= AValue * EditorScaleFontPercents div 100;
1311 end;
1312 
1313 
1314 function SFindRegexMatch(const Subject, Regex: UnicodeString; out MatchPos, MatchLen: integer): boolean;
1315 var
1316   Obj: TRegExpr;
1317 begin
1318   Result:= false;
1319   MatchPos:= 0;
1320   MatchLen:= 0;
1321 
1322   Obj:= TRegExpr.Create;
1323   try
1324     Obj.ModifierS:= false;
1325     Obj.ModifierM:= true;
1326     Obj.ModifierI:= false;
1327     Obj.Expression:= Regex;
1328 
1329     if Obj.Exec(Subject) then
1330     begin
1331       Result:= true;
1332       MatchPos:= Obj.MatchPos[0];
1333       MatchLen:= Obj.MatchLen[0];
1334     end;
1335   finally
1336     FreeAndNil(Obj);
1337   end;
1338 end;
1339 
1340 function SFindRegexMatch(const Subject, Regex: UnicodeString; GroupIndex: integer; ModS, ModI, ModM: boolean): UnicodeString;
1341 var
1342   Obj: TRegExpr;
1343 begin
1344   Result:= '';
1345   Obj:= TRegExpr.Create;
1346   try
1347     Obj.ModifierS:= ModS;
1348     Obj.ModifierM:= ModM;
1349     Obj.ModifierI:= ModI;
1350     Obj.Expression:= Regex;
1351     if Obj.Exec(Subject) then
1352       Result:= Obj.Match[GroupIndex];
1353   finally
1354     FreeAndNil(Obj);
1355   end;
1356 end;
1357 
1358 
1359 function SCountTextOccurrences(const SubStr, Str: UnicodeString): integer;
1360 var
1361   Offset: integer;
1362 begin
1363   Result:= 0;
1364   if (Str='') or (SubStr='') then exit;
1365   Offset:= PosEx(SubStr, Str, 1);
1366   while Offset<>0 do
1367   begin
1368     Inc(Result);
1369     Offset:= PosEx(SubStr, Str, Offset + Length(SubStr));
1370   end;
1371 end;
1372 
1373 function SCountTextLines(const Str, StrBreak: UnicodeString): integer;
1374 begin
1375   Result:= SCountTextOccurrences(StrBreak, Str)+1;
1376   // ignore trailing EOL
1377   if Length(Str)>=Length(StrBreak) then
1378     if Copy(Str, Length(Str)-Length(StrBreak)+1, Length(StrBreak))=StrBreak then
1379       Dec(Result);
1380 end;
1381 
1382 procedure SSplitByChar(const S: string; Sep: char; out S1, S2: string);
1383 var
1384   N: integer;
1385 begin
1386   N:= Pos(Sep, S);
1387   if N=0 then
1388   begin
1389     S1:= S;
1390     S2:= '';
1391   end
1392   else
1393   begin
1394     S1:= Copy(S, 1, N-1);
1395     S2:= Copy(S, N+1, Length(S));
1396   end;
1397 end;
1398 
1399 function IsStringWithUnicode(const S: string): boolean;
1400 var
1401   i: integer;
1402 begin
1403   for i:= 1 to Length(S) do
1404     if Ord(S[i])>=128 then
1405       exit(true);
1406   Result:= false;
1407 end;
1408 
1409 function IsStringWithUnicode(const S: UnicodeString): boolean;
1410 var
1411   i: integer;
1412 begin
1413   for i:= 1 to Length(S) do
1414     if Ord(S[i])>=128 then
1415       exit(true);
1416   Result:= false;
1417 end;
1418 
1419 function HexDigitToInt(ch: char): integer;
1420 begin
1421   case ch of
1422     '0'..'9':
1423       Result:= Ord(ch)-Ord('0');
1424     'a'..'f':
1425       Result:= Ord(ch)-Ord('a')+10;
1426     'A'..'F':
1427       Result:= Ord(ch)-Ord('A')+10;
1428     else
1429       Result:= 0;
1430   end;
1431 end;
1432 
1433 end.
1434 
1435