1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * good.c - see if a word or its root word
4 * is in the dictionary.
5 *
6 * Pace Willisson, 1983
7 *
8 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All modifications to the source code must be clearly marked as
21 * such. Binary redistributions based on modified source code
22 * must be clearly marked as modified versions in the documentation
23 * and/or other materials provided with the distribution.
24 * 4. All advertising materials mentioning features or use of this software
25 * must display the following acknowledgment:
26 * This product includes software developed by Geoff Kuenning and
27 * other unpaid contributors.
28 * 5. The name of Geoff Kuenning may not be used to endorse or promote
29 * products derived from this software without specific prior
30 * written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * SUCH DAMAGE.
43 */
44
45 /*
46 * $Log$
47 * Revision 1.4 2003/08/14 17:51:26 dom
48 * update license - exception clause should be Lesser GPL
49 *
50 * Revision 1.3 2003/07/28 20:40:25 dom
51 * fix up the license clause, further win32-registry proof some directory getting functions
52 *
53 * Revision 1.2 2003/07/16 22:52:37 dom
54 * LGPL + exception license
55 *
56 * Revision 1.1 2003/07/15 01:15:04 dom
57 * ispell enchant backend
58 *
59 * Revision 1.2 2003/01/29 05:50:11 hippietrail
60 *
61 * Fixed my mess in EncodingManager.
62 * Changed many C casts to C++ casts.
63 *
64 * Revision 1.1 2003/01/24 05:52:32 hippietrail
65 *
66 * Refactored ispell code. Old ispell global variables had been put into
67 * an allocated structure, a pointer to which was passed to many functions.
68 * I have now made all such functions and variables private members of the
69 * ISpellChecker class. It was C OO, now it's C++ OO.
70 *
71 * I've fixed the makefiles and tested compilation but am unable to test
72 * operation. Please back out my changes if they cause problems which
73 * are not obvious or easy to fix.
74 *
75 * Revision 1.6 2003/01/06 18:48:38 dom
76 * ispell cleanup, start of using new 'add' save features
77 *
78 * Revision 1.5 2002/09/19 05:31:15 hippietrail
79 *
80 * More Ispell cleanup. Conditional globals and DEREF macros are removed.
81 * K&R function declarations removed, converted to Doxygen style comments
82 * where possible. No code has been changed (I hope). Compiles for me but
83 * unable to test.
84 *
85 * Revision 1.4 2002/09/17 03:03:29 hippietrail
86 *
87 * After seeking permission on the developer list I've reformatted all the
88 * spelling source which seemed to have parts which used 2, 3, 4, and 8
89 * spaces for tabs. It should all look good with our standard 4-space
90 * tabs now.
91 * I've concentrated just on indentation in the actual code. More prettying
92 * could be done.
93 * * NO code changes were made *
94 *
95 * Revision 1.3 2002/09/13 17:20:12 mpritchett
96 * Fix more warnings for Linux build
97 *
98 * Revision 1.2 2001/05/12 16:05:42 thomasf
99 * Big pseudo changes to ispell to make it pass around a structure rather
100 * than rely on all sorts of gloabals willy nilly here and there. Also
101 * fixed our spelling class to work with accepting suggestions once more.
102 * This code is dirty, gross and ugly (not to mention still not supporting
103 * multiple hash sized just yet) but it works on my machine and will no
104 * doubt break other machines.
105 *
106 * Revision 1.1 2001/04/15 16:01:24 tomas_f
107 * moving to spell/xp
108 *
109 * Revision 1.5 2000/02/09 22:35:25 sterwill
110 * Clean up some warnings
111 *
112 * Revision 1.4 1998/12/29 14:55:32 eric
113 *
114 * I've doctored the ispell code pretty extensively here. It is now
115 * warning-free on Win32. It also *works* on Win32 now, since I
116 * replaced all the I/O calls with ANSI standard ones.
117 *
118 * Revision 1.3 1998/12/28 23:11:30 eric
119 *
120 * modified spell code and integration to build on Windows.
121 * This is still a hack.
122 *
123 * Actually, it doesn't yet WORK on Windows. It just builds.
124 * SpellCheckInit is failing for some reason.
125 *
126 * Revision 1.2 1998/12/28 22:16:22 eric
127 *
128 * These changes begin to incorporate the spell checker into AbiWord. Most
129 * of this is a hack.
130 *
131 * 1. added other/spell to the -I list in config/abi_defs
132 * 2. replaced other/spell/Makefile with one which is more like
133 * our build system.
134 * 3. added other/spell to other/Makefile so that the build will now
135 * dive down and build the spell check library.
136 * 4. added the AbiSpell library to the Makefiles in wp/main
137 * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
138 * This call is a HACK and should be replaced with something
139 * proper later.
140 * 6. added code to fv_View.cpp as follows:
141 * whenever you double-click on a word, the spell checker
142 * verifies that word and prints its status to stdout.
143 *
144 * Caveats:
145 * 1. This will break the Windows build. I'm going to work on fixing it
146 * now.
147 * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash.
148 * The dictionary location is currently hard-coded. This will be
149 * fixed as well.
150 *
151 * Anyway, such as it is, it works.
152 *
153 * Revision 1.1 1998/12/28 18:04:43 davet
154 * Spell checker code stripped from ispell. At this point, there are
155 * two external routines... the Init routine, and a check-a-word routine
156 * which returns a boolean value, and takes a 16 bit char string.
157 * The code resembles the ispell code as much as possible still.
158 *
159 * Revision 1.43 1994/11/02 06:56:05 geoff
160 * Remove the anyword feature, which I've decided is a bad idea.
161 *
162 * Revision 1.42 1994/10/25 05:45:59 geoff
163 * Add support for an affix that will work with any word, even if there's
164 * no explicit flag.
165 *
166 * Revision 1.41 1994/05/24 06:23:06 geoff
167 * Let tgood decide capitalization questions, rather than doing it ourselves.
168 *
169 * Revision 1.40 1994/05/17 06:44:10 geoff
170 * Add support for controlled compound formation and the COMPOUNDONLY
171 * option to affix flags.
172 *
173 * Revision 1.39 1994/01/25 07:11:31 geoff
174 * Get rid of all old RCS log lines in preparation for the 3.1 release.
175 *
176 */
177
178 #include <ctype.h>
179 #include <stdio.h>
180 #include <stdlib.h>
181 #include <string.h>
182
183 #include "ispell_checker.h"
184
185
186 int good P ((ichar_t * word, int ignoreflagbits, int allhits,
187 int pfxopts, int sfxopts));
188
189 #ifndef NO_CAPITALIZATION_SUPPORT
190
191 /*!
192 ** See if this particular capitalization (dent) is legal with these
193 ** particular affixes.
194 **
195 ** \param dent
196 ** \param hit
197 **
198 ** \return
199 */
entryhasaffixes(struct dent * dent,struct success * hit)200 static int entryhasaffixes (struct dent *dent, struct success *hit)
201 {
202 if (hit->prefix && !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
203 return 0;
204 if (hit->suffix && !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
205 return 0;
206 return 1; /* Yes, these affixes are legal */
207 }
208
209 /*
210 * \param word
211 * \param hit
212 * \param len
213 *
214 * \return
215 */
cap_ok(ichar_t * word,struct success * hit,int len)216 int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
217 {
218 register ichar_t * dword;
219 register ichar_t * w;
220 register struct dent * dent;
221 ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN];
222 int preadd;
223 int prestrip;
224 int sufadd;
225 ichar_t * limit;
226 long thiscap;
227 long dentcap;
228
229 thiscap = whatcap (word);
230 /*
231 ** All caps is always legal, regardless of affixes.
232 */
233 preadd = prestrip = sufadd = 0;
234 if (thiscap == ALLCAPS)
235 return 1;
236 else if (thiscap == FOLLOWCASE)
237 {
238 /* Set up some constants for the while(1) loop below */
239 if (hit->prefix)
240 {
241 preadd = hit->prefix->affl;
242 prestrip = hit->prefix->stripl;
243 }
244 else
245 preadd = prestrip = 0;
246 sufadd = hit->suffix ? hit->suffix->affl : 0;
247 }
248 /*
249 ** Search the variants for one that matches what we have. Note
250 ** that thiscap can't be ALLCAPS, since we already returned
251 ** for that case.
252 */
253 dent = hit->dictent;
254 for ( ; ; )
255 {
256 dentcap = captype (dent->flagfield);
257 if (dentcap != thiscap)
258 {
259 if (dentcap == ANYCASE && thiscap == CAPITALIZED
260 && entryhasaffixes (dent, hit))
261 return 1;
262 }
263 else /* captypes match */
264 {
265 if (thiscap != FOLLOWCASE)
266 {
267 if (entryhasaffixes (dent, hit))
268 return 1;
269 }
270 else
271 {
272 /*
273 ** Make sure followcase matches exactly.
274 ** Life is made more difficult by the
275 ** possibility of affixes. Start with
276 ** the prefix.
277 */
278 strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
279 dword = dentword;
280 limit = word + preadd;
281 if (myupper (dword[prestrip]))
282 {
283 for (w = word; w < limit; w++)
284 {
285 if (mylower (*w))
286 goto doublecontinue;
287 }
288 }
289 else
290 {
291 for (w = word; w < limit; w++)
292 {
293 if (myupper (*w))
294 goto doublecontinue;
295 }
296 }
297 dword += prestrip;
298 /* Do root part of word */
299 limit = dword + len - preadd - sufadd;
300 while (dword < limit)
301 {
302 if (*dword++ != *w++)
303 goto doublecontinue;
304 }
305 /* Do suffix */
306 dword = limit - 1;
307 if (myupper (*dword))
308 {
309 for ( ; *w; w++)
310 {
311 if (mylower (*w))
312 goto doublecontinue;
313 }
314 }
315 else
316 {
317 for ( ; *w; w++)
318 {
319 if (myupper (*w))
320 goto doublecontinue;
321 }
322 }
323 /*
324 ** All failure paths go to "doublecontinue,"
325 ** so if we get here it must match.
326 */
327 if (entryhasaffixes (dent, hit))
328 return 1;
329 doublecontinue: ;
330 }
331 }
332 if ((dent->flagfield & MOREVARIANTS) == 0)
333 break;
334 dent = dent->next;
335 }
336
337 /* No matches found */
338 return 0;
339 }
340 #endif
341
342 #ifndef NO_CAPITALIZATION_SUPPORT
343 /*!
344 * \param w Word to look up
345 * \param ignoreflagbits NZ to ignore affix flags in dict
346 * \param allhits NZ to ignore case, get every hit
347 * \param pfxopts Options to apply to prefixes
348 * \param sfxopts Options to apply to suffixes
349 *
350 * \return
351 */
good(ichar_t * w,int ignoreflagbits,int allhits,int pfxopts,int sfxopts)352 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
353 #else
354 /* ARGSUSED */
355 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
356 #endif
357 {
358 ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];
359 register ichar_t * p;
360 register ichar_t * q;
361 register int n;
362 register struct dent * dp;
363
364 /*
365 ** Make an uppercase copy of the word we are checking.
366 */
367 for (p = w, q = nword; *p; )
368 *q++ = mytoupper (*p++);
369 *q = 0;
370 n = q - nword;
371
372 m_numhits = 0;
373
374 if ((dp = ispell_lookup (nword, 1)) != NULL)
375 {
376 m_hits[0].dictent = dp;
377 m_hits[0].prefix = NULL;
378 m_hits[0].suffix = NULL;
379 #ifndef NO_CAPITALIZATION_SUPPORT
380 if (allhits || cap_ok (w, &m_hits[0], n))
381 m_numhits = 1;
382 #else
383 m_numhits = 1;
384 #endif
385 }
386
387 if (m_numhits && !allhits)
388 return 1;
389
390 /* try stripping off affixes */
391
392 chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);
393
394 return m_numhits;
395 }
396
397
398
399
400