1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * correct.c - Routines to manage the higher-level aspects of spell-checking
4  *
5  * This code originally resided in ispell.c, but was moved here to keep
6  * file sizes smaller.
7  *
8  * Copyright (c), 1983, by Pace Willisson
9  *
10  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  *
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All modifications to the source code must be clearly marked as
23  *    such.  Binary redistributions based on modified source code
24  *    must be clearly marked as modified versions in the documentation
25  *    and/or other materials provided with the distribution.
26  * 4. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgment:
28  *      This product includes software developed by Geoff Kuenning and
29  *      other unpaid contributors.
30  * 5. The name of Geoff Kuenning may not be used to endorse or promote
31  *    products derived from this software without specific prior
32  *    written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46 
47 /*
48  * $Log$
49  * Revision 1.4  2003/08/14 17:51:26  dom
50  * update license - exception clause should be Lesser GPL
51  *
52  * Revision 1.3  2003/07/28 20:40:25  dom
53  * fix up the license clause, further win32-registry proof some directory getting functions
54  *
55  * Revision 1.2  2003/07/16 22:52:35  dom
56  * LGPL + exception license
57  *
58  * Revision 1.1  2003/07/15 01:15:04  dom
59  * ispell enchant backend
60  *
61  * Revision 1.2  2003/01/29 05:50:11  hippietrail
62  *
63  * Fixed my mess in EncodingManager.
64  * Changed many C casts to C++ casts.
65  *
66  * Revision 1.1  2003/01/24 05:52:31  hippietrail
67  *
68  * Refactored ispell code. Old ispell global variables had been put into
69  * an allocated structure, a pointer to which was passed to many functions.
70  * I have now made all such functions and variables private members of the
71  * ISpellChecker class. It was C OO, now it's C++ OO.
72  *
73  * I've fixed the makefiles and tested compilation but am unable to test
74  * operation. Please back out my changes if they cause problems which
75  * are not obvious or easy to fix.
76  *
77  * Revision 1.7  2002/09/19 05:31:15  hippietrail
78  *
79  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
80  * K&R function declarations removed, converted to Doxygen style comments
81  * where possible.  No code has been changed (I hope).  Compiles for me but
82  * unable to test.
83  *
84  * Revision 1.6  2002/09/17 03:03:28  hippietrail
85  *
86  * After seeking permission on the developer list I've reformatted all the
87  * spelling source which seemed to have parts which used 2, 3, 4, and 8
88  * spaces for tabs.  It should all look good with our standard 4-space
89  * tabs now.
90  * I've concentrated just on indentation in the actual code.  More prettying
91  * could be done.
92  * * NO code changes were made *
93  *
94  * Revision 1.5  2002/09/13 17:20:12  mpritchett
95  * Fix more warnings for Linux build
96  *
97  * Revision 1.4  2002/03/06 08:27:16  fjfranklin
98  * o Only activate compound handling when the hash file says so (Per Larsson)
99  *
100  * Revision 1.3  2001/05/14 09:52:50  hub
101  * Removed newMain.c from GNUmakefile.am
102  *
103  * C++ comments are not C comment. Changed to C comments
104  *
105  * Revision 1.2  2001/05/12 16:05:42  thomasf
106  * Big pseudo changes to ispell to make it pass around a structure rather
107  * than rely on all sorts of gloabals willy nilly here and there.  Also
108  * fixed our spelling class to work with accepting suggestions once more.
109  * This code is dirty, gross and ugly (not to mention still not supporting
110  * multiple hash sized just yet) but it works on my machine and will no
111  * doubt break other machines.
112  *
113  * Revision 1.1  2001/04/15 16:01:24  tomas_f
114  * moving to spell/xp
115  *
116  * Revision 1.2  1999/10/05 16:17:28  paul
117  * Fixed build, and other tidyness.
118  * Spell dialog enabled by default, with keyboard binding of F7.
119  *
120  * Revision 1.1  1999/09/29 23:33:32  justin
121  * Updates to the underlying ispell-based code to support suggested corrections.
122  *
123  * Revision 1.59  1995/08/05  23:19:43  geoff
124  * Fix a bug that caused offsets for long lines to be confused if the
125  * line started with a quoting uparrow.
126  *
127  * Revision 1.58  1994/11/02  06:56:00  geoff
128  * Remove the anyword feature, which I've decided is a bad idea.
129  *
130  * Revision 1.57  1994/10/26  05:12:39  geoff
131  * Try boundary characters when inserting or substituting letters, except
132  * (naturally) at word boundaries.
133  *
134  * Revision 1.56  1994/10/25  05:46:30  geoff
135  * Fix an assignment inside a conditional that could generate spurious
136  * warnings (as well as being bad style).  Add support for the FF_ANYWORD
137  * option.
138  *
139  * Revision 1.55  1994/09/16  04:48:24  geoff
140  * Don't pass newlines from the input to various other routines, and
141  * don't assume that those routines leave the input unchanged.
142  *
143  * Revision 1.54  1994/09/01  06:06:41  geoff
144  * Change erasechar/killchar to uerasechar/ukillchar to avoid
145  * shared-library problems on HP systems.
146  *
147  * Revision 1.53  1994/08/31  05:58:38  geoff
148  * Add code to handle extremely long lines in -a mode without splitting
149  * words or reporting incorrect offsets.
150  *
151  * Revision 1.52  1994/05/25  04:29:24  geoff
152  * Fix a bug that caused line widths to be calculated incorrectly when
153  * displaying lines containing tabs.  Fix a couple of places where
154  * characters were sign-extended incorrectly, which could cause 8-bit
155  * characters to be displayed wrong.
156  *
157  * Revision 1.51  1994/05/17  06:44:05  geoff
158  * Add support for controlled compound formation and the COMPOUNDONLY
159  * option to affix flags.
160  *
161  * Revision 1.50  1994/04/27  05:20:14  geoff
162  * Allow compound words to be formed from more than two components
163  *
164  * Revision 1.49  1994/04/27  01:50:31  geoff
165  * Add support to correctly capitalize words generated as a result of a
166  * missing-space suggestion.
167  *
168  * Revision 1.48  1994/04/03  23:23:02  geoff
169  * Clean up the code in missingspace() to be a bit simpler and more
170  * efficient.
171  *
172  * Revision 1.47  1994/03/15  06:24:23  geoff
173  * Fix the +/-/~ commands to be independent.  Allow the + command to
174  * receive a suffix which is a deformatter type (currently hardwired to
175  * be either tex or nroff/troff).
176  *
177  * Revision 1.46  1994/02/21  00:20:03  geoff
178  * Fix some bugs that could cause bad displays in the interaction between
179  * TeX parsing and string characters.  Show_char now will not overrun
180  * the inverse-video display area by accident.
181  *
182  * Revision 1.45  1994/02/14  00:34:51  geoff
183  * Fix correct to accept length parameters for ctok and itok, so that it
184  * can pass them to the to/from ichar routines.
185  *
186  * Revision 1.44  1994/01/25  07:11:22  geoff
187  * Get rid of all old RCS log lines in preparation for the 3.1 release.
188  *
189  */
190 
191 #include <stdlib.h>
192 #include <string.h>
193 #include <ctype.h>
194 #include "ispell_checker.h"
195 #include "msgs.h"
196 
197 /*
198 extern void upcase P ((ichar_t * string));
199 extern void lowcase P ((ichar_t * string));
200 extern ichar_t * strtosichar P ((char * in, int canonical));
201 
202 int compoundflag = COMPOUND_CONTROLLED;
203 */
204 
205 /*
206  * \param a
207  * \param b
208  * \param canonical NZ for canonical string chars
209  *
210  * \return
211  */
212 int
casecmp(char * a,char * b,int canonical)213 ISpellChecker::casecmp (char *a, char *b, int canonical)
214 {
215     register ichar_t *	ap;
216     register ichar_t *	bp;
217     ichar_t		inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
218     ichar_t		intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
219 
220     strtoichar (inta, a, sizeof inta, canonical);
221     strtoichar (intb, b, sizeof intb, canonical);
222     for (ap = inta, bp = intb;  *ap != 0;  ap++, bp++)
223 	{
224 		if (*ap != *bp)
225 	    {
226 			if (*bp == '\0')
227 				return m_hashheader.sortorder[*ap];
228 			else if (mylower (*ap))
229 			{
230 				if (mylower (*bp)  ||  mytoupper (*ap) != *bp)
231 					return static_cast<int>(m_hashheader.sortorder[*ap])
232 					  - static_cast<int>(m_hashheader.sortorder[*bp]);
233 			}
234 			else
235 			{
236 				if (myupper (*bp)  ||  mytolower (*ap) != *bp)
237 					return static_cast<int>(m_hashheader.sortorder[*ap])
238 					  - static_cast<int>(m_hashheader.sortorder[*bp]);
239 			}
240 	    }
241 	}
242     if (*bp != '\0')
243 		return -static_cast<int>(m_hashheader.sortorder[*bp]);
244     for (ap = inta, bp = intb;  *ap;  ap++, bp++)
245 	{
246 		if (*ap != *bp)
247 	    {
248 			return static_cast<int>(m_hashheader.sortorder[*ap])
249 			  - static_cast<int>(m_hashheader.sortorder[*bp]);
250 	    }
251 	}
252     return 0;
253 }
254 
255 /*
256  * \param word
257  */
258 void
makepossibilities(ichar_t * word)259 ISpellChecker::makepossibilities (ichar_t *word)
260 {
261     register int	i;
262 
263     for (i = 0; i < MAXPOSSIBLE; i++)
264 	m_possibilities[i][0] = 0;
265     m_pcount = 0;
266     m_maxposslen = 0;
267     m_easypossibilities = 0;
268 
269 #ifndef NO_CAPITALIZATION_SUPPORT
270     wrongcapital (word);
271 #endif
272 
273 /*
274  * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
275  * page 363, the correct order for this is:
276  * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
277  * thus, it was exactly backwards in the old version. -- PWP
278  */
279 
280     if (m_pcount < MAXPOSSIBLE)
281 		missingletter (word);		/* omission */
282     if (m_pcount < MAXPOSSIBLE)
283 		transposedletter (word);	/* transposition */
284     if (m_pcount < MAXPOSSIBLE)
285 		extraletter (word);		/* insertion */
286     if (m_pcount < MAXPOSSIBLE)
287 		wrongletter (word);		/* substitution */
288 
289     if ((m_hashheader.compoundflag != COMPOUND_ANYTIME)  &&
290 		  m_pcount < MAXPOSSIBLE)
291 		missingspace (word);	/* two words */
292 
293 }
294 
295 /*
296  * \param word
297  *
298  * \return
299  */
300 int
insert(ichar_t * word)301 ISpellChecker::insert (ichar_t *word)
302 {
303     register int	i;
304     register char *	realword;
305 
306     realword = ichartosstr (word, 0);
307     for (i = 0; i < m_pcount; i++)
308 	{
309 		if (strcmp (m_possibilities[i], realword) == 0)
310 			return (0);
311 	}
312 
313     strcpy (m_possibilities[m_pcount++], realword);
314     i = strlen (realword);
315     if (i > m_maxposslen)
316 		m_maxposslen = i;
317     if (m_pcount >= MAXPOSSIBLE)
318 		return (-1);
319     else
320 		return (0);
321 }
322 
323 #ifndef NO_CAPITALIZATION_SUPPORT
324 /*
325  * \param word
326  */
327 void
wrongcapital(ichar_t * word)328 ISpellChecker::wrongcapital (ichar_t *word)
329 {
330     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN];
331 
332     /*
333     ** When the third parameter to "good" is nonzero, it ignores
334     ** case.  If the word matches this way, "ins_cap" will recapitalize
335     ** it correctly.
336     */
337     if (good (word, 0, 1, 0, 0))
338 	{
339 		icharcpy (newword, word);
340 		upcase (newword);
341 		ins_cap (newword, word);
342 	}
343 }
344 #endif
345 
346 /*
347  * \param word
348  */
349 void
wrongletter(ichar_t * word)350 ISpellChecker::wrongletter (ichar_t *word)
351 {
352     register int	i;
353     register int	j;
354     register int	n;
355     ichar_t		savechar;
356     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN];
357 
358     n = icharlen (word);
359     icharcpy (newword, word);
360 #ifndef NO_CAPITALIZATION_SUPPORT
361     upcase (newword);
362 #endif
363 
364     for (i = 0; i < n; i++)
365 	{
366 		savechar = newword[i];
367 		for (j=0; j < m_Trynum; ++j)
368 		{
369 			if (m_Try[j] == savechar)
370 				continue;
371 			else if (isboundarych (m_Try[j])  &&  (i == 0  ||  i == n - 1))
372 				continue;
373 			newword[i] = m_Try[j];
374 			if (good (newword, 0, 1, 0, 0))
375 			{
376 				if (ins_cap (newword, word) < 0)
377 					return;
378 			}
379 		}
380 		newword[i] = savechar;
381 	}
382 }
383 
384 /*
385  * \param word
386  */
387 void
extraletter(ichar_t * word)388 ISpellChecker::extraletter (ichar_t *word)
389 {
390     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN];
391     register ichar_t *	p;
392     register ichar_t *	r;
393 
394     if (icharlen (word) < 2)
395 		return;
396 
397     icharcpy (newword, word + 1);
398     for (p = word, r = newword;  *p != 0;  )
399 	{
400 		if (good (newword, 0, 1, 0, 0))
401 		{
402 			if (ins_cap (newword, word) < 0)
403 				return;
404 		}
405 		*r++ = *p++;
406 	}
407 }
408 
409 /*
410  * \param word
411  */
412 void
missingletter(ichar_t * word)413 ISpellChecker::missingletter (ichar_t *word)
414 {
415     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
416     register ichar_t *	p;
417     register ichar_t *	r;
418     register int	i;
419 
420     icharcpy (newword + 1, word);
421     for (p = word, r = newword;  *p != 0;  )
422 	{
423 		for (i = 0;  i < m_Trynum;  i++)
424 	    {
425 			if (isboundarych (m_Try[i])  &&  r == newword)
426 				continue;
427 			*r = m_Try[i];
428 			if (good (newword, 0, 1, 0, 0))
429 			{
430 				if (ins_cap (newword, word) < 0)
431 					return;
432 			}
433 	    }
434 		*r++ = *p++;
435 	}
436     for (i = 0;  i < m_Trynum;  i++)
437 	{
438 		if (isboundarych (m_Try[i]))
439 			continue;
440 		*r = m_Try[i];
441 		if (good (newword, 0, 1, 0, 0))
442 		{
443 			if (ins_cap (newword, word) < 0)
444 				return;
445 		}
446 	}
447 }
448 
449 /*
450  * \param word
451  */
missingspace(ichar_t * word)452 void ISpellChecker::missingspace (ichar_t *word)
453 {
454     ichar_t		firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
455     int			firstno;	/* Index into first */
456     ichar_t *		firstp;		/* Ptr into current firsthalf word */
457     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
458     int			nfirsthalf;	/* No. words saved in 1st half */
459     int			nsecondhalf;	/* No. words saved in 2nd half */
460     register ichar_t *	p;
461     ichar_t		secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
462     int			secondno;	/* Index into second */
463 
464     /*
465     ** We don't do words of length less than 3;  this keeps us from
466     ** splitting all two-letter words into two single letters.  We
467     ** also don't do maximum-length words, since adding the space
468     ** would exceed the size of the "possibilities" array.
469     */
470     nfirsthalf = icharlen (word);
471     if (nfirsthalf < 3  ||  nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1)
472 		return;
473     icharcpy (newword + 1, word);
474     for (p = newword + 1;  p[1] != '\0';  p++)
475 	{
476 		p[-1] = *p;
477 		*p = '\0';
478 		if (good (newword, 0, 1, 0, 0))
479 		{
480 			/*
481 			 * Save_cap must be called before good() is called on the
482 			 * second half, because it uses state left around by
483 			 * good().  This is unfortunate because it wastes a bit of
484 			 * time, but I don't think it's a significant performance
485 			 * problem.
486 			 */
487 			nfirsthalf = save_cap (newword, word, firsthalf);
488 			if (good (p + 1, 0, 1, 0, 0))
489 			{
490 				nsecondhalf = save_cap (p + 1, p + 1, secondhalf);
491 				for (firstno = 0;  firstno < nfirsthalf;  firstno++)
492 				{
493 					firstp = &firsthalf[firstno][p - newword];
494 					for (secondno = 0;  secondno < nsecondhalf;  secondno++)
495 					{
496 						*firstp = ' ';
497 						icharcpy (firstp + 1, secondhalf[secondno]);
498 						if (insert (firsthalf[firstno]) < 0)
499 							return;
500 						*firstp = '-';
501 						if (insert (firsthalf[firstno]) < 0)
502 							return;
503 					}
504 				}
505 			}
506 		}
507 	}
508 }
509 
510 /*
511  * \param word
512  * \param pfxopts Options to apply to prefixes
513  */
514 int
compoundgood(ichar_t * word,int pfxopts)515 ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
516 {
517     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN];
518     register ichar_t *	p;
519     register ichar_t	savech;
520     long		secondcap;	/* Capitalization of 2nd half */
521 
522     /*
523     ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
524     */
525     if (m_hashheader.compoundflag == COMPOUND_NEVER)
526 		return 0;
527     /*
528     ** Test for a possible compound word (for languages like German that
529     ** form lots of compounds).
530     **
531     ** This is similar to missingspace, except we quit on the first hit,
532     ** and we won't allow either member of the compound to be a single
533     ** letter.
534     **
535     ** We don't do words of length less than 2 * compoundmin, since
536     ** both halves must at least compoundmin letters.
537     */
538     if (icharlen (word) < 2 * m_hashheader.compoundmin)
539 		return 0;
540     icharcpy (newword, word);
541     p = newword + m_hashheader.compoundmin;
542     for (  ;  p[m_hashheader.compoundmin - 1] != 0;  p++)
543 	{
544 		savech = *p;
545 		*p = 0;
546 		if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
547 	    {
548 			*p = savech;
549 			if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
550 			  ||  compoundgood (p, FF_COMPOUNDONLY))
551 			{
552 				secondcap = whatcap (p);
553 				switch (whatcap (newword))
554 				{
555 				case ANYCASE:
556 				case CAPITALIZED:
557 				case FOLLOWCASE:	/* Followcase can have l.c. suffix */
558 					return secondcap == ANYCASE;
559 				case ALLCAPS:
560 					return secondcap == ALLCAPS;
561 				}
562 			}
563 	    }
564 		else
565 			*p = savech;
566 	}
567     return 0;
568 }
569 
570 /*
571  * \param word
572  */
573 void
transposedletter(ichar_t * word)574 ISpellChecker::transposedletter (ichar_t *word)
575 {
576     ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN];
577     register ichar_t *	p;
578     register ichar_t	temp;
579 
580     icharcpy (newword, word);
581     for (p = newword;  p[1] != 0;  p++)
582 	{
583 		temp = *p;
584 		*p = p[1];
585 		p[1] = temp;
586 		if (good (newword, 0, 1, 0, 0))
587 	    {
588 			if (ins_cap (newword, word) < 0)
589 				return;
590 	    }
591 		temp = *p;
592 		*p = p[1];
593 		p[1] = temp;
594 	}
595 }
596 
597 /*!
598  * Insert one or more correctly capitalized versions of word
599  *
600  * \param word
601  * \param pattern
602  *
603  * \return
604  */
605 int
ins_cap(ichar_t * word,ichar_t * pattern)606 ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern)
607 {
608     int			i;		/* Index into savearea */
609     int			nsaved;		/* No. of words saved */
610     ichar_t		savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
611 
612     nsaved = save_cap (word, pattern, savearea);
613     for (i = 0;  i < nsaved;  i++)
614 	{
615 		if (insert (savearea[i]) < 0)
616 			return -1;
617 	}
618     return 0;
619 }
620 
621 /*!
622  * Save one or more correctly capitalized versions of word
623  *
624  * \param word Word to save
625  * \param pattern Prototype capitalization pattern
626  * \param savearea Room to save words
627  *
628  * \return
629  */
630 int
save_cap(ichar_t * word,ichar_t * pattern,ichar_t savearea[MAX_CAPS][INPUTWORDLEN+MAXAFFIXLEN])631 ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern,
632 					ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])
633 {
634     int			hitno;		/* Index into hits array */
635     int			nsaved;		/* Number of words saved */
636     int			preadd;		/* No. chars added to front of root */
637     int			prestrip;	/* No. chars stripped from front */
638     int			sufadd;		/* No. chars added to back of root */
639     int			sufstrip;	/* No. chars stripped from back */
640 
641     if (*word == 0)
642 		return 0;
643 
644     for (hitno = m_numhits, nsaved = 0;  --hitno >= 0  &&  nsaved < MAX_CAPS;  )
645 	{
646 		if (m_hits[hitno].prefix)
647 	    {
648 			prestrip = m_hits[hitno].prefix->stripl;
649 			preadd = m_hits[hitno].prefix->affl;
650 	    }
651 		else
652 			prestrip = preadd = 0;
653 		if (m_hits[hitno].suffix)
654 	    {
655 			sufstrip = m_hits[hitno].suffix->stripl;
656 			sufadd = m_hits[hitno].suffix->affl;
657 	    }
658 		else
659 			sufadd = sufstrip = 0;
660 		save_root_cap (word, pattern, prestrip, preadd,
661 			sufstrip, sufadd,
662 			m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix,
663 			savearea, &nsaved);
664 	}
665     return nsaved;
666 }
667 
668 /*
669  * \param word
670  * \param pattern
671  * \param prestrip
672  * \param preadd
673  * \param sufstrip
674  * \param sufadd
675  * \param firstdent
676  * \param pfxent
677  * \param sufent
678  *
679  * \return
680  */
681 int
ins_root_cap(ichar_t * word,ichar_t * pattern,int prestrip,int preadd,int sufstrip,int sufadd,struct dent * firstdent,struct flagent * pfxent,struct flagent * sufent)682 ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern,
683 				 int prestrip, int preadd, int sufstrip, int sufadd,
684   				 struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent)
685 {
686     int			i;		/* Index into savearea */
687     ichar_t		savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
688     int			nsaved;		/* Number of words saved */
689 
690     nsaved = 0;
691     save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd,
692       firstdent, pfxent, sufent, savearea, &nsaved);
693     for (i = 0;  i < nsaved;  i++)
694 	{
695 		if (insert (savearea[i]) < 0)
696 			return -1;
697 	}
698     return 0;
699 }
700 
701 /* ARGSUSED */
702 /*!
703  * \param word Word to be saved
704  * \param pattern Capitalization pattern
705  * \param prestrip No. chars stripped from front
706  * \param preadd No. chars added to front of root
707  * \param sufstrip No. chars stripped from back
708  * \param sufadd No. chars added to back of root
709  * \param firstdent First dent for root
710  * \param pfxent Pfx-flag entry for word
711  * \param sufent Sfx-flag entry for word
712  * \param savearea Room to save words
713  * \param nsaved Number saved so far (updated)
714  */
715 void
save_root_cap(ichar_t * word,ichar_t * pattern,int prestrip,int preadd,int sufstrip,int sufadd,struct dent * firstdent,struct flagent * pfxent,struct flagent * sufent,ichar_t savearea[MAX_CAPS][INPUTWORDLEN+MAXAFFIXLEN],int * nsaved)716 ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern,
717 						  int prestrip, int preadd, int sufstrip, int sufadd,
718 						  struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent,
719 						  ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
720 					      int * nsaved)
721 {
722 #ifndef NO_CAPITALIZATION_SUPPORT
723     register struct dent * dent;
724 #endif /* NO_CAPITALIZATION_SUPPORT */
725     int			firstisupper;
726     ichar_t		newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
727 #ifndef NO_CAPITALIZATION_SUPPORT
728     register ichar_t *	p;
729     int			len;
730     int			i;
731     int			limit;
732 #endif /* NO_CAPITALIZATION_SUPPORT */
733 
734     if (*nsaved >= MAX_CAPS)
735 		return;
736     icharcpy (newword, word);
737     firstisupper = myupper (pattern[0]);
738 #ifdef NO_CAPITALIZATION_SUPPORT
739     /*
740     ** Apply the old, simple-minded capitalization rules.
741     */
742     if (firstisupper)
743 	{
744 		if (myupper (pattern[1]))
745 			upcase (newword);
746 		else
747 	    {
748 			lowcase (newword);
749 			newword[0] = mytoupper (newword[0]);
750 	    }
751 	}
752     else
753 		lowcase (newword);
754     icharcpy (savearea[*nsaved], newword);
755     (*nsaved)++;
756     return;
757 #else /* NO_CAPITALIZATION_SUPPORT */
758 #define flagsareok(dent)    \
759     ((pfxent == NULL \
760 	||  TSTMASKBIT (dent->mask, pfxent->flagbit)) \
761       &&  (sufent == NULL \
762 	||  TSTMASKBIT (dent->mask, sufent->flagbit)))
763 
764     dent = firstdent;
765     if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
766 	{
767 		upcase (newword);	/* Uppercase required */
768 		icharcpy (savearea[*nsaved], newword);
769 		(*nsaved)++;
770 		return;
771 	}
772     for (p = pattern;  *p;  p++)
773 	{
774 		if (mylower (*p))
775 			break;
776 	}
777     if (*p == 0)
778 	{
779 		upcase (newword);	/* Pattern was all caps */
780 		icharcpy (savearea[*nsaved], newword);
781 		(*nsaved)++;
782 		return;
783 	}
784     for (p = pattern + 1;  *p;  p++)
785 	{
786 		if (myupper (*p))
787 			break;
788 	}
789     if (*p == 0)
790 	{
791 		/*
792 		** The pattern was all-lower or capitalized.  If that's
793 		** legal, insert only that version.
794 		*/
795 		if (firstisupper)
796 		{
797 			if (captype (dent->flagfield) == CAPITALIZED
798 			  ||  captype (dent->flagfield) == ANYCASE)
799 			{
800 				lowcase (newword);
801 				newword[0] = mytoupper (newword[0]);
802 				icharcpy (savearea[*nsaved], newword);
803 				(*nsaved)++;
804 				return;
805 			}
806 		}
807 		else
808 		{
809 			if (captype (dent->flagfield) == ANYCASE)
810 			{
811 				lowcase (newword);
812 				icharcpy (savearea[*nsaved], newword);
813 				(*nsaved)++;
814 				return;
815 			}
816 		}
817 		while (dent->flagfield & MOREVARIANTS)
818 		{
819 			dent = dent->next;
820 			if (captype (dent->flagfield) == FOLLOWCASE
821 			  ||  !flagsareok (dent))
822 				continue;
823 			if (firstisupper)
824 			{
825 				if (captype (dent->flagfield) == CAPITALIZED)
826 				{
827 					lowcase (newword);
828 					newword[0] = mytoupper (newword[0]);
829 					icharcpy (savearea[*nsaved], newword);
830 					(*nsaved)++;
831 					return;
832 				}
833 			}
834 			else
835 			{
836 				if (captype (dent->flagfield) == ANYCASE)
837 				{
838 					lowcase (newword);
839 					icharcpy (savearea[*nsaved], newword);
840 					(*nsaved)++;
841 					return;
842 				}
843 			}
844 	    }
845 	}
846     /*
847     ** Either the sample had complex capitalization, or the simple
848     ** capitalizations (all-lower or capitalized) are illegal.
849     ** Insert all legal capitalizations, including those that are
850     ** all-lower or capitalized.  If the prototype is capitalized,
851     ** capitalized all-lower samples.  Watch out for affixes.
852     */
853     dent = firstdent;
854     p = strtosichar (dent->word, 1);
855     len = icharlen (p);
856     if (dent->flagfield & MOREVARIANTS)
857 		dent = dent->next;	/* Skip place-holder entry */
858     for (  ;  ;  )
859 	{
860 		if (flagsareok (dent))
861 	    {
862 			if (captype (dent->flagfield) != FOLLOWCASE)
863 			{
864 				lowcase (newword);
865 				if (firstisupper  ||  captype (dent->flagfield) == CAPITALIZED)
866 					newword[0] = mytoupper (newword[0]);
867 				icharcpy (savearea[*nsaved], newword);
868 				(*nsaved)++;
869 				if (*nsaved >= MAX_CAPS)
870 					return;
871 			}
872 			else
873 			{
874 				/* Followcase is the tough one. */
875 				p = strtosichar (dent->word, 1);
876 				memmove (
877 				  reinterpret_cast<char *>(newword + preadd),
878 				  reinterpret_cast<char *>(p + prestrip),
879 				  (len - prestrip - sufstrip) * sizeof (ichar_t));
880 				if (myupper (p[prestrip]))
881 				{
882 					for (i = 0;  i < preadd;  i++)
883 						newword[i] = mytoupper (newword[i]);
884 				}
885 				else
886 				{
887 					for (i = 0;  i < preadd;  i++)
888 						newword[i] = mytolower (newword[i]);
889 				}
890 				limit = len + preadd + sufadd - prestrip - sufstrip;
891 				i = len + preadd - prestrip - sufstrip;
892 				p += len - sufstrip - 1;
893 				if (myupper (*p))
894 				{
895 					for (p = newword + i;  i < limit;  i++, p++)
896 						*p = mytoupper (*p);
897 				}
898 				else
899 				{
900 					for (p = newword + i;  i < limit;  i++, p++)
901 						*p = mytolower (*p);
902 				}
903 				icharcpy (savearea[*nsaved], newword);
904 				(*nsaved)++;
905 				if (*nsaved >= MAX_CAPS)
906 					return;
907 			}
908 	    }
909 		if ((dent->flagfield & MOREVARIANTS) == 0)
910 			break;		/* End of the line */
911 		dent = dent->next;
912 	}
913     return;
914 #endif /* NO_CAPITALIZATION_SUPPORT */
915 }
916 
917 
918