1{
2  Author: Mattias Gaertner
3
4 *****************************************************************************
5  See the file COPYING.modifiedLGPL.txt, included in this distribution,
6  for details about the license.
7 *****************************************************************************
8
9  Abstract:
10    A wordcompletion stores words and can createse a list of words gathered
11    from the recently added words and provided source texts.
12
13}
14unit WordCompletion;
15
16{$mode objfpc}{$H+}
17
18interface
19
20uses
21  Classes, SysUtils, Forms, Controls, SynEdit;
22
23type
24  TWordCompletionGetSource =
25    procedure(var Source:TStrings; var TopLine, BottomLine: Integer;
26      var IgnoreWordPos: TPoint; SourceIndex:integer) of object;
27
28  TWordCompletion = class
29  private
30    FWordBuffer:TStringList;// the recent used words list. the newest are at the end
31    FWordBufferCapacity:integer;
32    FOnGetSource:TWordCompletionGetSource;
33    function GetWordBufferCapacity:integer;
34    procedure SetWordBufferCapacity(NewCapacity: integer);
35    function CaseInsensitiveIndexOf(const AWord: string):integer;
36    function CaseSensitiveIndexOf(const AWord: string):integer;
37  protected
38    procedure DoGetSource(var Source:TStrings; var TopLine, BottomLine: Integer;
39      var IgnoreWordPos: TPoint; SourceIndex:integer); virtual;
40  public
41    constructor Create;
42    destructor Destroy; override;
43    procedure AddWord(const AWord:string);
44    property WordBufferCapacity:integer
45       read GetWordBufferCapacity write SetWordBufferCapacity;
46    procedure GetWordList(AWordList:TStrings; const Filter: String;
47      ContainsFilter, CaseSensitive:boolean; MaxResults:integer);
48    procedure CompletePrefix(const Prefix: string; var CompletedPrefix: string;
49       CaseSensitive:boolean);
50  public
51    property OnGetSource:TWordCompletionGetSource
52       read FOnGetSource write FOnGetSource;
53  end;
54
55implementation
56
57type
58  TCharType = (ctNone,ctWordBegin,ctWord);
59
60var
61  CharTable: array[char] of TCharType;
62
63procedure InitCharTable;
64var c:char;
65begin
66  for c:=low(char) to high(char) do
67    case c of
68      'a'..'z','A'..'Z','_':CharTable[c]:=ctWordBegin;
69      '0'..'9':CharTable[c]:=ctWord;
70    else CharTable[c]:=ctNone;
71    end;
72end;
73
74
75{ TWordCompletion }
76
77// fast pos functions used in GetWordList
78
79function MyPos(const SubStr, S: string; Offset, LastPos: SizeInt): SizeInt;
80var
81  i,MaxLen, SubLen : SizeInt;
82  SubFirst: Char;
83  pc: pchar;
84begin
85  Result:=0;
86  SubLen := Length(SubStr);
87  if (SubLen > 0) and (Offset > 0) and (Offset <= LastPos) then
88   begin
89    MaxLen := LastPos- SubLen;
90    SubFirst := SubStr[1];
91    i := IndexByte(S[Offset],LastPos - Offset + 1, Byte(SubFirst));
92    while (i >= 0) and ((i + sizeint(Offset) - 1) <= MaxLen) do
93    begin
94      pc := @S[i+SizeInt(Offset)];
95      //we know now that pc^ = SubFirst, because indexbyte returned a value > -1
96      if (CompareByte(Substr[1],pc^,SubLen) = 0) then
97        Exit(i + SizeInt(Offset));
98      //point Offset to next char in S
99      Offset := sizeint(i) + Offset + 1;
100      i := IndexByte(S[Offset],LastPos - Offset + 1, Byte(SubFirst));
101    end;
102  end;
103end;
104
105procedure TWordCompletion.GetWordList(AWordList: TStrings;
106  const Filter: String; ContainsFilter, CaseSensitive: boolean;
107  MaxResults: integer);
108var i, Line, x, FilterLen, MaxHash, LineLen: integer;
109  UpFilter, LineText, UpLineText, UpWordBuffer: string;
110  SourceText: TStrings;
111  HashList: ^integer;// index list. Every entry points to a word in the AWordList
112  SourceTextIndex, SourceTopLine, SourceBottomLine:integer;
113  LastCharType:TCharType;
114  IgnoreWordPos: TPoint;
115
116  procedure Add(const AWord:string);
117  // if AWord is not already in list then add it to AWordList
118  var a,Hash,HashTry:integer;
119    ALowWord:string;
120  begin
121    ALowWord:=lowercase(AWord);
122    Hash:=0;
123    a:=1;
124    while (a<=length(ALowWord)) and (a<20) do begin
125      inc(Hash,ord(ALowWord[a]) and $7f);
126      inc(a);
127    end;
128    Hash:=(Hash*137) mod MaxHash;
129    HashTry:=0;
130    while (HashTry<MaxHash) do begin
131      a:=HashList[(Hash+HashTry) mod MaxHash];
132      if a>=0 then begin
133        if (AWordList[a]=AWord) then
134          // word already in list -> do not add
135          exit;
136      end else begin
137        // word not in list -> add
138        HashList[(Hash+HashTry) mod MaxHash]:=AWordList.Add(AWord);
139        exit;
140      end;
141      inc(HashTry);
142    end;
143  end;
144
145  procedure AddIfMatch(const ALine, ALineUp:string; const AFirstPos, ALength: Integer);
146  var
147    AAdd: Boolean;
148  begin
149    if FilterLen=0 then
150      AAdd := True
151    else
152    begin
153      AAdd := False;
154      if CaseSensitive then begin
155        if ContainsFilter then
156          AAdd := MyPos(Filter, ALine, AFirstPos, AFirstPos+ALength-1)>0
157        else
158          AAdd := strlcomp(PChar(@ALine[AFirstPos]),PChar(Filter),FilterLen)=0;
159      end else
160      begin
161        if ContainsFilter then
162          AAdd := MyPos(UpFilter, ALineUp, AFirstPos, AFirstPos+ALength-1)>0
163        else
164          AAdd := strlcomp(PChar(@ALineUp[AFirstPos]),PChar(UpFilter),FilterLen)=0;
165      end;
166    end;
167    if AAdd then
168      Add(Copy(ALine, AFirstPos, ALength));
169  end;
170
171// TWordCompletion.GetWordList
172begin
173  AWordList.Clear;
174  if MaxResults<1 then MaxResults:=1;
175  MaxHash:=MaxResults*3;
176  GetMem(HashList,MaxHash*SizeOf(Integer));
177  try
178    for i:=0 to MaxHash-1 do HashList[i]:=-1;
179    FilterLen:=length(Filter);
180    AWordList.Capacity:=MaxResults;
181    UpFilter:=uppercase(Filter);
182    // first add all recently used words
183    i:=FWordBuffer.Count-1;
184    UpWordBuffer:='';
185    while (i>=0) and (AWordList.Count<MaxResults) do begin
186      if not CaseSensitive then
187        UpWordBuffer := UpperCase(FWordBuffer[i]);
188      AddIfMatch(FWordBuffer[i], UpWordBuffer, 1, Length(FWordBuffer[i]));
189      dec(i);
190    end;
191    if AWordList.Count>=MaxResults then exit;
192    // then search in all sources for more words that could fit
193    SourceTextIndex:=0;
194
195    SourceText:=nil;
196    SourceTopLine:=0;
197    SourceBottomLine:=-1;
198    IgnoreWordPos:=Point(-1,-1);
199    DoGetSource(SourceText,SourceTopLine,SourceBottomLine,IgnoreWordPos,SourceTextIndex);
200    UpLineText:='';
201    repeat
202      if SourceText<>nil then begin
203        Line:=SourceTopLine;
204        if SourceBottomLine<0 then
205          SourceBottomLine := SourceText.Count-1;
206        while (Line<=SourceBottomLine) do begin
207          LineText:=SourceText[line];
208          LineLen:=length(LineText);
209          if not CaseSensitive then
210            UpLineText:=uppercase(LineText);
211          x:=1;
212          LastCharType:=ctNone;
213          while (x<=LineLen) do begin
214            if (LastCharType=ctNone) and (CharTable[LineText[x]]=ctWordBegin)
215            then begin
216              // word found
217              i:=x;
218              repeat
219                inc(i);
220              until (i>LineLen) or (CharTable[LineText[i]]=ctNone);
221              if (i-x>=FilterLen) and not ((Line=IgnoreWordPos.Y) and (x<=IgnoreWordPos.X) and (IgnoreWordPos.X<=i)) then begin
222                AddIfMatch(LineText,UpLineText,x,i-x);
223                if AWordList.Count>=MaxResults then exit;
224              end;
225              x:=i;
226            end else
227              inc(x);
228            LastCharType:=CharTable[LineText[x-1]];
229          end;
230          inc(line);
231        end;
232      end;
233      inc(SourceTextIndex);
234      SourceText:=nil;
235      SourceTopLine:=0;
236      SourceBottomLine:=-1;
237      IgnoreWordPos:=Point(-1,-1);
238      DoGetSource(SourceText,SourceTopLine,SourceBottomLine,IgnoreWordPos,SourceTextIndex);
239    until SourceText=nil;
240  finally
241    FreeMem(HashList);
242  end;
243end;
244
245procedure TWordCompletion.CompletePrefix(const Prefix: string;
246  var CompletedPrefix: string; CaseSensitive:boolean);
247var
248  WordList: TStringList;
249  s: string;
250  SamePos: Integer;
251  MaxPos: Integer;
252  i: Integer;
253begin
254  CompletedPrefix:=Prefix;
255  WordList:=TStringList.Create;
256  try
257    // fetch all words with Prefix
258    GetWordList(WordList,Prefix,False,CaseSensitive,10000);
259    if WordList.Count=0 then exit;
260    // find the biggest prefix of all available words
261    CompletedPrefix:=WordList[0];
262    for i:=1 to WordList.Count-1 do begin
263      // stop, when it can't get shorter
264      if CompletedPrefix=Prefix then exit;
265      s:=WordList[i];
266      if length(s)<length(Prefix) then continue;
267      // count same
268      SamePos:=0;
269      MaxPos:=length(s);
270      if MaxPos>length(CompletedPrefix) then MaxPos:=length(CompletedPrefix);
271      while (SamePos<MaxPos) do begin
272        if CaseSensitive then begin
273          if s[SamePos+1]<>CompletedPrefix[SamePos+1] then
274            break;
275        end else begin
276          if upcase(s[SamePos+1])<>upcase(CompletedPrefix[SamePos+1]) then
277            break;
278        end;
279        inc(SamePos);
280      end;
281      if SamePos<length(Prefix) then continue;
282      if SamePos<length(CompletedPrefix) then
283        CompletedPrefix:=copy(CompletedPrefix,1,SamePos);
284    end;
285  finally
286    WordList.Free;
287  end;
288end;
289
290constructor TWordCompletion.Create;
291begin
292  inherited Create;
293  FWordBuffer:=TStringList.Create;
294  FWordBufferCapacity:=100;
295end;
296
297destructor TWordCompletion.Destroy;
298begin
299  FWordBuffer.Free;
300  inherited Destroy;
301end;
302
303procedure TWordCompletion.DoGetSource(var Source: TStrings; var TopLine,
304  BottomLine: Integer; var IgnoreWordPos: TPoint; SourceIndex: integer);
305begin
306  if Assigned(FOnGetSource) then
307    FOnGetSource(Source,TopLine,BottomLine,IgnoreWordPos,SourceIndex);
308end;
309
310function TWordCompletion.GetWordBufferCapacity:integer;
311begin
312  Result:=FWordBufferCapacity;
313end;
314
315procedure TWordCompletion.SetWordBufferCapacity(NewCapacity: integer);
316var TempWordBuffer:TStringList;
317  i:integer;
318begin
319  if NewCapacity<5 then NewCapacity:=5;
320  if NewCapacity<>FWordBufferCapacity then begin
321    FWordBufferCapacity:=NewCapacity;
322    if FWordBuffer.Count>NewCapacity then begin
323      TempWordBuffer:=TStringList.Create;
324      TempWordBuffer.Capacity:=NewCapacity;
325      i:=FWordBuffer.Count-NewCapacity;
326      while i<FWordBuffer.Count do begin
327        TempWordBuffer.Add(FWordBuffer[i]);
328        inc(i);
329      end;
330      FWordBuffer.Free;
331      FWordBuffer:=TempWordBuffer;
332    end;
333  end;
334end;
335
336procedure TWordCompletion.AddWord(const AWord:string);
337var OldIndex:integer;
338begin
339  OldIndex:=CaseSensitiveIndexOf(AWord);
340  if OldIndex>=0 then begin
341    // move word to the top
342    FWordBuffer.Move(OldIndex,FWordBuffer.Count-1);
343  end else begin
344    // add new word
345    if FWordBuffer.Count=FWordBufferCapacity then
346      FWordBuffer.Delete(0);
347    FWordBuffer.Add(AWord);
348  end;
349end;
350
351function TWordCompletion.CaseInsensitiveIndexOf(const AWord:string):integer;
352begin
353  Result:=FWordBuffer.Count-1;
354  while (Result>=0) and (CompareText(FWordBuffer[Result],AWord)<>0) do
355    dec(Result);
356end;
357
358function TWordCompletion.CaseSensitiveIndexOf(const AWord: string): integer;
359begin
360  Result:=FWordBuffer.Count-1;
361  while (Result>=0) and (FWordBuffer[Result]<>AWord) do
362    dec(Result);
363end;
364
365initialization
366  InitCharTable;
367
368end.
369
370