1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * good.c - see if a word or its root word
4  * is in the dictionary.
5  *
6  * Pace Willisson, 1983
7  *
8  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All modifications to the source code must be clearly marked as
21  *    such.  Binary redistributions based on modified source code
22  *    must be clearly marked as modified versions in the documentation
23  *    and/or other materials provided with the distribution.
24  * 4. All advertising materials mentioning features or use of this software
25  *    must display the following acknowledgment:
26  *      This product includes software developed by Geoff Kuenning and
27  *      other unpaid contributors.
28  * 5. The name of Geoff Kuenning may not be used to endorse or promote
29  *    products derived from this software without specific prior
30  *    written permission.
31  *
32  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42  * SUCH DAMAGE.
43  */
44 
45 /*
46  * $Log$
47  * Revision 1.4  2003/08/14 17:51:26  dom
48  * update license - exception clause should be Lesser GPL
49  *
50  * Revision 1.3  2003/07/28 20:40:25  dom
51  * fix up the license clause, further win32-registry proof some directory getting functions
52  *
53  * Revision 1.2  2003/07/16 22:52:37  dom
54  * LGPL + exception license
55  *
56  * Revision 1.1  2003/07/15 01:15:04  dom
57  * ispell enchant backend
58  *
59  * Revision 1.2  2003/01/29 05:50:11  hippietrail
60  *
61  * Fixed my mess in EncodingManager.
62  * Changed many C casts to C++ casts.
63  *
64  * Revision 1.1  2003/01/24 05:52:32  hippietrail
65  *
66  * Refactored ispell code. Old ispell global variables had been put into
67  * an allocated structure, a pointer to which was passed to many functions.
68  * I have now made all such functions and variables private members of the
69  * ISpellChecker class. It was C OO, now it's C++ OO.
70  *
71  * I've fixed the makefiles and tested compilation but am unable to test
72  * operation. Please back out my changes if they cause problems which
73  * are not obvious or easy to fix.
74  *
75  * Revision 1.6  2003/01/06 18:48:38  dom
76  * ispell cleanup, start of using new 'add' save features
77  *
78  * Revision 1.5  2002/09/19 05:31:15  hippietrail
79  *
80  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
81  * K&R function declarations removed, converted to Doxygen style comments
82  * where possible.  No code has been changed (I hope).  Compiles for me but
83  * unable to test.
84  *
85  * Revision 1.4  2002/09/17 03:03:29  hippietrail
86  *
87  * After seeking permission on the developer list I've reformatted all the
88  * spelling source which seemed to have parts which used 2, 3, 4, and 8
89  * spaces for tabs.  It should all look good with our standard 4-space
90  * tabs now.
91  * I've concentrated just on indentation in the actual code.  More prettying
92  * could be done.
93  * * NO code changes were made *
94  *
95  * Revision 1.3  2002/09/13 17:20:12  mpritchett
96  * Fix more warnings for Linux build
97  *
98  * Revision 1.2  2001/05/12 16:05:42  thomasf
99  * Big pseudo changes to ispell to make it pass around a structure rather
100  * than rely on all sorts of gloabals willy nilly here and there.  Also
101  * fixed our spelling class to work with accepting suggestions once more.
102  * This code is dirty, gross and ugly (not to mention still not supporting
103  * multiple hash sized just yet) but it works on my machine and will no
104  * doubt break other machines.
105  *
106  * Revision 1.1  2001/04/15 16:01:24  tomas_f
107  * moving to spell/xp
108  *
109  * Revision 1.5  2000/02/09 22:35:25  sterwill
110  * Clean up some warnings
111  *
112  * Revision 1.4  1998/12/29 14:55:32  eric
113  *
114  * I've doctored the ispell code pretty extensively here.  It is now
115  * warning-free on Win32.  It also *works* on Win32 now, since I
116  * replaced all the I/O calls with ANSI standard ones.
117  *
118  * Revision 1.3  1998/12/28 23:11:30  eric
119  *
120  * modified spell code and integration to build on Windows.
121  * This is still a hack.
122  *
123  * Actually, it doesn't yet WORK on Windows.  It just builds.
124  * SpellCheckInit is failing for some reason.
125  *
126  * Revision 1.2  1998/12/28 22:16:22  eric
127  *
128  * These changes begin to incorporate the spell checker into AbiWord.  Most
129  * of this is a hack.
130  *
131  * 1.  added other/spell to the -I list in config/abi_defs
132  * 2.  replaced other/spell/Makefile with one which is more like
133  * 	our build system.
134  * 3.  added other/spell to other/Makefile so that the build will now
135  * 	dive down and build the spell check library.
136  * 4.  added the AbiSpell library to the Makefiles in wp/main
137  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
138  * 	This call is a HACK and should be replaced with something
139  * 	proper later.
140  * 6.  added code to fv_View.cpp as follows:
141  * 	whenever you double-click on a word, the spell checker
142  * 	verifies that word and prints its status to stdout.
143  *
144  * Caveats:
145  * 1.  This will break the Windows build.  I'm going to work on fixing it
146  * 	now.
147  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
148  * 	The dictionary location is currently hard-coded.  This will be
149  * 	fixed as well.
150  *
151  * Anyway, such as it is, it works.
152  *
153  * Revision 1.1  1998/12/28 18:04:43  davet
154  * Spell checker code stripped from ispell.  At this point, there are
155  * two external routines...  the Init routine, and a check-a-word routine
156  * which returns a boolean value, and takes a 16 bit char string.
157  * The code resembles the ispell code as much as possible still.
158  *
159  * Revision 1.43  1994/11/02  06:56:05  geoff
160  * Remove the anyword feature, which I've decided is a bad idea.
161  *
162  * Revision 1.42  1994/10/25  05:45:59  geoff
163  * Add support for an affix that will work with any word, even if there's
164  * no explicit flag.
165  *
166  * Revision 1.41  1994/05/24  06:23:06  geoff
167  * Let tgood decide capitalization questions, rather than doing it ourselves.
168  *
169  * Revision 1.40  1994/05/17  06:44:10  geoff
170  * Add support for controlled compound formation and the COMPOUNDONLY
171  * option to affix flags.
172  *
173  * Revision 1.39  1994/01/25  07:11:31  geoff
174  * Get rid of all old RCS log lines in preparation for the 3.1 release.
175  *
176  */
177 
178 #include <ctype.h>
179 #include <stdio.h>
180 #include <stdlib.h>
181 #include <string.h>
182 
183 #include "ispell_checker.h"
184 
185 
186 int		good P ((ichar_t * word, int ignoreflagbits, int allhits,
187 			 int pfxopts, int sfxopts));
188 
189 #ifndef NO_CAPITALIZATION_SUPPORT
190 
191 /*!
192 ** See if this particular capitalization (dent) is legal with these
193 ** particular affixes.
194 **
195 ** \param dent
196 ** \param hit
197 **
198 ** \return
199 */
entryhasaffixes(struct dent * dent,struct success * hit)200 static int entryhasaffixes (struct dent *dent, struct success *hit)
201 {
202     if (hit->prefix  &&  !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
203 		return 0;
204     if (hit->suffix  &&  !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
205 		return 0;
206     return 1;			/* Yes, these affixes are legal */
207 }
208 
209 /*
210  * \param word
211  * \param hit
212  * \param len
213  *
214  * \return
215  */
cap_ok(ichar_t * word,struct success * hit,int len)216 int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
217 {
218     register ichar_t *		dword;
219     register ichar_t *		w;
220     register struct dent *	dent;
221     ichar_t			dentword[INPUTWORDLEN + MAXAFFIXLEN];
222     int				preadd;
223     int				prestrip;
224     int				sufadd;
225     ichar_t *		limit;
226     long			thiscap;
227     long			dentcap;
228 
229     thiscap = whatcap (word);
230     /*
231     ** All caps is always legal, regardless of affixes.
232     */
233     preadd = prestrip = sufadd = 0;
234     if (thiscap == ALLCAPS)
235 		return 1;
236     else if (thiscap == FOLLOWCASE)
237 	{
238 		/* Set up some constants for the while(1) loop below */
239 		if (hit->prefix)
240 		{
241 			preadd = hit->prefix->affl;
242 			prestrip = hit->prefix->stripl;
243 		}
244 		else
245 			preadd = prestrip = 0;
246 		sufadd = hit->suffix ? hit->suffix->affl : 0;
247 	}
248     /*
249     ** Search the variants for one that matches what we have.  Note
250     ** that thiscap can't be ALLCAPS, since we already returned
251     ** for that case.
252     */
253     dent = hit->dictent;
254     for (  ;  ;  )
255 	{
256 		dentcap = captype (dent->flagfield);
257 		if (dentcap != thiscap)
258 		{
259 			if (dentcap == ANYCASE  &&  thiscap == CAPITALIZED
260 			 &&  entryhasaffixes (dent, hit))
261 				return 1;
262 		}
263 		else				/* captypes match */
264 		{
265 			if (thiscap != FOLLOWCASE)
266 			{
267 				if (entryhasaffixes (dent, hit))
268 					return 1;
269 			}
270 			else
271 			{
272 				/*
273 				** Make sure followcase matches exactly.
274 				** Life is made more difficult by the
275 				** possibility of affixes.  Start with
276 				** the prefix.
277 				*/
278 				strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
279 				dword = dentword;
280 				limit = word + preadd;
281 				if (myupper (dword[prestrip]))
282 				{
283 					for (w = word;  w < limit;  w++)
284 					{
285 						if (mylower (*w))
286 							goto doublecontinue;
287 					}
288 				}
289 				else
290 				{
291 					for (w = word;  w < limit;  w++)
292 					{
293 						if (myupper (*w))
294 							goto doublecontinue;
295 					}
296 				}
297 				dword += prestrip;
298 				/* Do root part of word */
299 				limit = dword + len - preadd - sufadd;
300 				while (dword < limit)
301 				{
302 					if (*dword++ != *w++)
303 						goto doublecontinue;
304 				}
305 				/* Do suffix */
306 				dword = limit - 1;
307 				if (myupper (*dword))
308 				{
309 					for (  ;  *w;  w++)
310 					{
311 						if (mylower (*w))
312 							goto doublecontinue;
313 					}
314 				}
315 				else
316 				{
317 					for (  ;  *w;  w++)
318 					{
319 						if (myupper (*w))
320 							goto doublecontinue;
321 					}
322 				}
323 				/*
324 				** All failure paths go to "doublecontinue,"
325 				** so if we get here it must match.
326 				*/
327 				if (entryhasaffixes (dent, hit))
328 					return 1;
329 				doublecontinue:	;
330 			}
331 		}
332 		if ((dent->flagfield & MOREVARIANTS) == 0)
333 			break;
334 		dent = dent->next;
335 	}
336 
337     /* No matches found */
338     return 0;
339 }
340 #endif
341 
342 #ifndef NO_CAPITALIZATION_SUPPORT
343 /*!
344  * \param w Word to look up
345  * \param ignoreflagbits NZ to ignore affix flags in dict
346  * \param allhits NZ to ignore case, get every hit
347  * \param pfxopts Options to apply to prefixes
348  * \param sfxopts Options to apply to suffixes
349  *
350  * \return
351  */
good(ichar_t * w,int ignoreflagbits,int allhits,int pfxopts,int sfxopts)352 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
353 #else
354 /* ARGSUSED */
355 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
356 #endif
357 {
358     ichar_t		nword[INPUTWORDLEN + MAXAFFIXLEN];
359     register ichar_t *	p;
360     register ichar_t *	q;
361     register int	n;
362     register struct dent * dp;
363 
364     /*
365     ** Make an uppercase copy of the word we are checking.
366     */
367     for (p = w, q = nword;  *p;  )
368 		*q++ = mytoupper (*p++);
369     *q = 0;
370     n = q - nword;
371 
372     m_numhits = 0;
373 
374     if ((dp = ispell_lookup (nword, 1)) != NULL)
375 	{
376 		m_hits[0].dictent = dp;
377 		m_hits[0].prefix = NULL;
378 		m_hits[0].suffix = NULL;
379 #ifndef NO_CAPITALIZATION_SUPPORT
380 		if (allhits  ||  cap_ok (w, &m_hits[0], n))
381 			m_numhits = 1;
382 #else
383 		m_numhits = 1;
384 #endif
385 	}
386 
387     if (m_numhits  &&  !allhits)
388 		return 1;
389 
390     /* try stripping off affixes */
391 
392     chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);
393 
394     return m_numhits;
395 }
396 
397 
398 
399 
400