1 
2 /* Copyright (C) 2010-2019 by The D Language Foundation, All Rights Reserved
3  * http://www.digitalmars.com
4  * Distributed under the Boost Software License, Version 1.0.
5  * (See accompanying file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
6  * https://github.com/D-Programming-Language/dmd/blob/master/src/root/speller.c
7  */
8 
9 #include "dsystem.h"
10 #include "speller.h"
11 
12 const char idchars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
13 
14 /**************************************************
15  * combine a new result from the spell checker to
16  * find the one with the closest symbol with
17  * respect to the cost defined by the search function
18  * Input/Output:
19  *      p       best found spelling (NULL if none found yet)
20  *      cost    cost of p (INT_MAX if none found yet)
21  * Input:
22  *      np      new found spelling (NULL if none found)
23  *      ncost   cost of np if non-NULL
24  * Returns:
25  *      true    if the cost is less or equal 0
26  *      false   otherwise
27  */
combineSpellerResult(void * & p,int & cost,void * np,int ncost)28 bool combineSpellerResult(void*& p, int& cost, void* np, int ncost)
29 {
30     if (np && ncost < cost)
31     {
32         p = np;
33         cost = ncost;
34         if (cost <= 0)
35             return true;
36     }
37     return false;
38 }
39 
spellerY(const char * seed,size_t seedlen,fp_speller_t fp,void * fparg,const char * charset,size_t index,int * cost)40 void *spellerY(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
41         const char *charset, size_t index, int* cost)
42 {
43     if (!seedlen)
44         return NULL;
45     assert(seed[seedlen] == 0);
46 
47     char tmp[30];
48     char *buf;
49     if (seedlen <= sizeof(tmp) - 2)
50         buf = tmp;
51     else
52     {
53         buf = (char *)alloca(seedlen + 2);    // leave space for extra char
54         if (!buf)
55             return NULL;                      // no matches
56     }
57 
58     memcpy(buf, seed, index);
59     *cost = INT_MAX;
60     void* p = NULL;
61     int ncost;
62 
63     /* Delete at seed[index] */
64     if (index < seedlen)
65     {
66         memcpy(buf + index, seed + index + 1, seedlen - index);
67         assert(buf[seedlen - 1] == 0);
68         void* np = (*fp)(fparg, buf, &ncost);
69         if (combineSpellerResult(p, *cost, np, ncost))
70             return p;
71     }
72 
73     if (charset && *charset)
74     {
75         /* Substitutions */
76         if (index < seedlen)
77         {
78             memcpy(buf, seed, seedlen + 1);
79             for (const char *s = charset; *s; s++)
80             {
81                 buf[index] = *s;
82 
83                 //printf("sub buf = '%s'\n", buf);
84                 void* np = (*fp)(fparg, buf, &ncost);
85                 if (combineSpellerResult(p, *cost, np, ncost))
86                     return p;
87             }
88             assert(buf[seedlen] == 0);
89         }
90 
91         /* Insertions */
92         memcpy (buf + index + 1, seed + index, seedlen + 1 - index);
93 
94         for (const char *s = charset; *s; s++)
95         {
96             buf[index] = *s;
97 
98             //printf("ins buf = '%s'\n", buf);
99             void* np = (*fp)(fparg, buf, &ncost);
100             if (combineSpellerResult(p, *cost, np, ncost))
101                 return p;
102         }
103         assert(buf[seedlen + 1] == 0);
104     }
105 
106     return p;                // return "best" result
107 }
108 
spellerX(const char * seed,size_t seedlen,fp_speller_t fp,void * fparg,const char * charset,int flag)109 void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
110         const char *charset, int flag)
111 {
112     if (!seedlen)
113         return NULL;
114 
115     char tmp[30];
116     char *buf;
117     if (seedlen <= sizeof(tmp) - 2)
118         buf = tmp;
119     else
120     {
121         buf = (char *)alloca(seedlen + 2);    // leave space for extra char
122         if (!buf)
123             return NULL;                      // no matches
124     }
125     int cost = INT_MAX, ncost;
126     void *p = NULL, *np;
127 
128     /* Deletions */
129     memcpy(buf, seed + 1, seedlen);
130     for (size_t i = 0; i < seedlen; i++)
131     {
132         //printf("del buf = '%s'\n", buf);
133         if (flag)
134             np = spellerY(buf, seedlen - 1, fp, fparg, charset, i, &ncost);
135         else
136             np = (*fp)(fparg, buf, &ncost);
137         if (combineSpellerResult(p, cost, np, ncost))
138             return p;
139 
140         buf[i] = seed[i];
141     }
142 
143     /* Transpositions */
144     if (!flag)
145     {
146         memcpy(buf, seed, seedlen + 1);
147         for (size_t i = 0; i + 1 < seedlen; i++)
148         {
149             // swap [i] and [i + 1]
150             buf[i] = seed[i + 1];
151             buf[i + 1] = seed[i];
152 
153             //printf("tra buf = '%s'\n", buf);
154             if (combineSpellerResult(p, cost, (*fp)(fparg, buf, &ncost), ncost))
155                 return p;
156 
157             buf[i] = seed[i];
158         }
159     }
160 
161     if (charset && *charset)
162     {
163         /* Substitutions */
164         memcpy(buf, seed, seedlen + 1);
165         for (size_t i = 0; i < seedlen; i++)
166         {
167             for (const char *s = charset; *s; s++)
168             {
169                 buf[i] = *s;
170 
171                 //printf("sub buf = '%s'\n", buf);
172                 if (flag)
173                     np = spellerY(buf, seedlen, fp, fparg, charset, i + 1, &ncost);
174                 else
175                     np = (*fp)(fparg, buf, &ncost);
176                 if (combineSpellerResult(p, cost, np, ncost))
177                     return p;
178             }
179             buf[i] = seed[i];
180         }
181 
182         /* Insertions */
183         memcpy(buf + 1, seed, seedlen + 1);
184         for (size_t i = 0; i <= seedlen; i++)      // yes, do seedlen+1 iterations
185         {
186             for (const char *s = charset; *s; s++)
187             {
188                 buf[i] = *s;
189 
190                 //printf("ins buf = '%s'\n", buf);
191                 if (flag)
192                     np = spellerY(buf, seedlen + 1, fp, fparg, charset, i + 1, &ncost);
193                 else
194                     np = (*fp)(fparg, buf, &ncost);
195                 if (combineSpellerResult(p, cost, np, ncost))
196                     return p;
197             }
198             buf[i] = seed[i];   // going past end of seed[] is ok, as we hit the 0
199         }
200     }
201 
202     return p;                // return "best" result
203 }
204 
205 /**************************************************
206  * Looks for correct spelling.
207  * Currently only looks a 'distance' of one from the seed[].
208  * This does an exhaustive search, so can potentially be very slow.
209  * Input:
210  *      seed            wrongly spelled word
211  *      fp              search function
212  *      fparg           argument to search function
213  *      charset         character set
214  * Returns:
215  *      NULL            no correct spellings found
216  *      void*           value returned by fp() for first possible correct spelling
217  */
218 
speller(const char * seed,fp_speller_t fp,void * fparg,const char * charset)219 void *speller(const char *seed, fp_speller_t fp, void *fparg, const char *charset)
220 {
221     size_t seedlen = strlen(seed);
222     size_t maxdist = seedlen < 4 ? seedlen / 2 : 2;
223     for (size_t distance = 0; distance < maxdist; distance++)
224     {   void *p = spellerX(seed, seedlen, fp, fparg, charset, distance);
225         if (p)
226             return p;
227 //      if (seedlen > 10)
228 //          break;
229     }
230     return NULL;   // didn't find it
231 }
232