1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * correct.c - Routines to manage the higher-level aspects of spell-checking
4 *
5 * This code originally resided in ispell.c, but was moved here to keep
6 * file sizes smaller.
7 *
8 * Copyright (c), 1983, by Pace Willisson
9 *
10 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All modifications to the source code must be clearly marked as
23 * such. Binary redistributions based on modified source code
24 * must be clearly marked as modified versions in the documentation
25 * and/or other materials provided with the distribution.
26 * 4. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgment:
28 * This product includes software developed by Geoff Kuenning and
29 * other unpaid contributors.
30 * 5. The name of Geoff Kuenning may not be used to endorse or promote
31 * products derived from this software without specific prior
32 * written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 */
46
47 /*
48 * $Log$
49 * Revision 1.4 2003/08/14 17:51:26 dom
50 * update license - exception clause should be Lesser GPL
51 *
52 * Revision 1.3 2003/07/28 20:40:25 dom
53 * fix up the license clause, further win32-registry proof some directory getting functions
54 *
55 * Revision 1.2 2003/07/16 22:52:35 dom
56 * LGPL + exception license
57 *
58 * Revision 1.1 2003/07/15 01:15:04 dom
59 * ispell enchant backend
60 *
61 * Revision 1.2 2003/01/29 05:50:11 hippietrail
62 *
63 * Fixed my mess in EncodingManager.
64 * Changed many C casts to C++ casts.
65 *
66 * Revision 1.1 2003/01/24 05:52:31 hippietrail
67 *
68 * Refactored ispell code. Old ispell global variables had been put into
69 * an allocated structure, a pointer to which was passed to many functions.
70 * I have now made all such functions and variables private members of the
71 * ISpellChecker class. It was C OO, now it's C++ OO.
72 *
73 * I've fixed the makefiles and tested compilation but am unable to test
74 * operation. Please back out my changes if they cause problems which
75 * are not obvious or easy to fix.
76 *
77 * Revision 1.7 2002/09/19 05:31:15 hippietrail
78 *
79 * More Ispell cleanup. Conditional globals and DEREF macros are removed.
80 * K&R function declarations removed, converted to Doxygen style comments
81 * where possible. No code has been changed (I hope). Compiles for me but
82 * unable to test.
83 *
84 * Revision 1.6 2002/09/17 03:03:28 hippietrail
85 *
86 * After seeking permission on the developer list I've reformatted all the
87 * spelling source which seemed to have parts which used 2, 3, 4, and 8
88 * spaces for tabs. It should all look good with our standard 4-space
89 * tabs now.
90 * I've concentrated just on indentation in the actual code. More prettying
91 * could be done.
92 * * NO code changes were made *
93 *
94 * Revision 1.5 2002/09/13 17:20:12 mpritchett
95 * Fix more warnings for Linux build
96 *
97 * Revision 1.4 2002/03/06 08:27:16 fjfranklin
98 * o Only activate compound handling when the hash file says so (Per Larsson)
99 *
100 * Revision 1.3 2001/05/14 09:52:50 hub
101 * Removed newMain.c from GNUmakefile.am
102 *
103 * C++ comments are not C comment. Changed to C comments
104 *
105 * Revision 1.2 2001/05/12 16:05:42 thomasf
106 * Big pseudo changes to ispell to make it pass around a structure rather
107 * than rely on all sorts of gloabals willy nilly here and there. Also
108 * fixed our spelling class to work with accepting suggestions once more.
109 * This code is dirty, gross and ugly (not to mention still not supporting
110 * multiple hash sized just yet) but it works on my machine and will no
111 * doubt break other machines.
112 *
113 * Revision 1.1 2001/04/15 16:01:24 tomas_f
114 * moving to spell/xp
115 *
116 * Revision 1.2 1999/10/05 16:17:28 paul
117 * Fixed build, and other tidyness.
118 * Spell dialog enabled by default, with keyboard binding of F7.
119 *
120 * Revision 1.1 1999/09/29 23:33:32 justin
121 * Updates to the underlying ispell-based code to support suggested corrections.
122 *
123 * Revision 1.59 1995/08/05 23:19:43 geoff
124 * Fix a bug that caused offsets for long lines to be confused if the
125 * line started with a quoting uparrow.
126 *
127 * Revision 1.58 1994/11/02 06:56:00 geoff
128 * Remove the anyword feature, which I've decided is a bad idea.
129 *
130 * Revision 1.57 1994/10/26 05:12:39 geoff
131 * Try boundary characters when inserting or substituting letters, except
132 * (naturally) at word boundaries.
133 *
134 * Revision 1.56 1994/10/25 05:46:30 geoff
135 * Fix an assignment inside a conditional that could generate spurious
136 * warnings (as well as being bad style). Add support for the FF_ANYWORD
137 * option.
138 *
139 * Revision 1.55 1994/09/16 04:48:24 geoff
140 * Don't pass newlines from the input to various other routines, and
141 * don't assume that those routines leave the input unchanged.
142 *
143 * Revision 1.54 1994/09/01 06:06:41 geoff
144 * Change erasechar/killchar to uerasechar/ukillchar to avoid
145 * shared-library problems on HP systems.
146 *
147 * Revision 1.53 1994/08/31 05:58:38 geoff
148 * Add code to handle extremely long lines in -a mode without splitting
149 * words or reporting incorrect offsets.
150 *
151 * Revision 1.52 1994/05/25 04:29:24 geoff
152 * Fix a bug that caused line widths to be calculated incorrectly when
153 * displaying lines containing tabs. Fix a couple of places where
154 * characters were sign-extended incorrectly, which could cause 8-bit
155 * characters to be displayed wrong.
156 *
157 * Revision 1.51 1994/05/17 06:44:05 geoff
158 * Add support for controlled compound formation and the COMPOUNDONLY
159 * option to affix flags.
160 *
161 * Revision 1.50 1994/04/27 05:20:14 geoff
162 * Allow compound words to be formed from more than two components
163 *
164 * Revision 1.49 1994/04/27 01:50:31 geoff
165 * Add support to correctly capitalize words generated as a result of a
166 * missing-space suggestion.
167 *
168 * Revision 1.48 1994/04/03 23:23:02 geoff
169 * Clean up the code in missingspace() to be a bit simpler and more
170 * efficient.
171 *
172 * Revision 1.47 1994/03/15 06:24:23 geoff
173 * Fix the +/-/~ commands to be independent. Allow the + command to
174 * receive a suffix which is a deformatter type (currently hardwired to
175 * be either tex or nroff/troff).
176 *
177 * Revision 1.46 1994/02/21 00:20:03 geoff
178 * Fix some bugs that could cause bad displays in the interaction between
179 * TeX parsing and string characters. Show_char now will not overrun
180 * the inverse-video display area by accident.
181 *
182 * Revision 1.45 1994/02/14 00:34:51 geoff
183 * Fix correct to accept length parameters for ctok and itok, so that it
184 * can pass them to the to/from ichar routines.
185 *
186 * Revision 1.44 1994/01/25 07:11:22 geoff
187 * Get rid of all old RCS log lines in preparation for the 3.1 release.
188 *
189 */
190
191 #include <stdlib.h>
192 #include <string.h>
193 #include <ctype.h>
194 #include "ispell_checker.h"
195 #include "msgs.h"
196
197 /*
198 extern void upcase P ((ichar_t * string));
199 extern void lowcase P ((ichar_t * string));
200 extern ichar_t * strtosichar P ((char * in, int canonical));
201
202 int compoundflag = COMPOUND_CONTROLLED;
203 */
204
205 /*
206 * \param a
207 * \param b
208 * \param canonical NZ for canonical string chars
209 *
210 * \return
211 */
212 int
casecmp(char * a,char * b,int canonical)213 ISpellChecker::casecmp (char *a, char *b, int canonical)
214 {
215 register ichar_t * ap;
216 register ichar_t * bp;
217 ichar_t inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
218 ichar_t intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
219
220 strtoichar (inta, a, sizeof inta, canonical);
221 strtoichar (intb, b, sizeof intb, canonical);
222 for (ap = inta, bp = intb; *ap != 0; ap++, bp++)
223 {
224 if (*ap != *bp)
225 {
226 if (*bp == '\0')
227 return m_hashheader.sortorder[*ap];
228 else if (mylower (*ap))
229 {
230 if (mylower (*bp) || mytoupper (*ap) != *bp)
231 return static_cast<int>(m_hashheader.sortorder[*ap])
232 - static_cast<int>(m_hashheader.sortorder[*bp]);
233 }
234 else
235 {
236 if (myupper (*bp) || mytolower (*ap) != *bp)
237 return static_cast<int>(m_hashheader.sortorder[*ap])
238 - static_cast<int>(m_hashheader.sortorder[*bp]);
239 }
240 }
241 }
242 if (*bp != '\0')
243 return -static_cast<int>(m_hashheader.sortorder[*bp]);
244 for (ap = inta, bp = intb; *ap; ap++, bp++)
245 {
246 if (*ap != *bp)
247 {
248 return static_cast<int>(m_hashheader.sortorder[*ap])
249 - static_cast<int>(m_hashheader.sortorder[*bp]);
250 }
251 }
252 return 0;
253 }
254
255 /*
256 * \param word
257 */
258 void
makepossibilities(ichar_t * word)259 ISpellChecker::makepossibilities (ichar_t *word)
260 {
261 register int i;
262
263 for (i = 0; i < MAXPOSSIBLE; i++)
264 m_possibilities[i][0] = 0;
265 m_pcount = 0;
266 m_maxposslen = 0;
267 m_easypossibilities = 0;
268
269 #ifndef NO_CAPITALIZATION_SUPPORT
270 wrongcapital (word);
271 #endif
272
273 /*
274 * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
275 * page 363, the correct order for this is:
276 * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
277 * thus, it was exactly backwards in the old version. -- PWP
278 */
279
280 if (m_pcount < MAXPOSSIBLE)
281 missingletter (word); /* omission */
282 if (m_pcount < MAXPOSSIBLE)
283 transposedletter (word); /* transposition */
284 if (m_pcount < MAXPOSSIBLE)
285 extraletter (word); /* insertion */
286 if (m_pcount < MAXPOSSIBLE)
287 wrongletter (word); /* substitution */
288
289 if ((m_hashheader.compoundflag != COMPOUND_ANYTIME) &&
290 m_pcount < MAXPOSSIBLE)
291 missingspace (word); /* two words */
292
293 }
294
295 /*
296 * \param word
297 *
298 * \return
299 */
300 int
insert(ichar_t * word)301 ISpellChecker::insert (ichar_t *word)
302 {
303 register int i;
304 register char * realword;
305
306 realword = ichartosstr (word, 0);
307 for (i = 0; i < m_pcount; i++)
308 {
309 if (strcmp (m_possibilities[i], realword) == 0)
310 return (0);
311 }
312
313 strcpy (m_possibilities[m_pcount++], realword);
314 i = strlen (realword);
315 if (i > m_maxposslen)
316 m_maxposslen = i;
317 if (m_pcount >= MAXPOSSIBLE)
318 return (-1);
319 else
320 return (0);
321 }
322
323 #ifndef NO_CAPITALIZATION_SUPPORT
324 /*
325 * \param word
326 */
327 void
wrongcapital(ichar_t * word)328 ISpellChecker::wrongcapital (ichar_t *word)
329 {
330 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
331
332 /*
333 ** When the third parameter to "good" is nonzero, it ignores
334 ** case. If the word matches this way, "ins_cap" will recapitalize
335 ** it correctly.
336 */
337 if (good (word, 0, 1, 0, 0))
338 {
339 icharcpy (newword, word);
340 upcase (newword);
341 ins_cap (newword, word);
342 }
343 }
344 #endif
345
346 /*
347 * \param word
348 */
349 void
wrongletter(ichar_t * word)350 ISpellChecker::wrongletter (ichar_t *word)
351 {
352 register int i;
353 register int j;
354 register int n;
355 ichar_t savechar;
356 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
357
358 n = icharlen (word);
359 icharcpy (newword, word);
360 #ifndef NO_CAPITALIZATION_SUPPORT
361 upcase (newword);
362 #endif
363
364 for (i = 0; i < n; i++)
365 {
366 savechar = newword[i];
367 for (j=0; j < m_Trynum; ++j)
368 {
369 if (m_Try[j] == savechar)
370 continue;
371 else if (isboundarych (m_Try[j]) && (i == 0 || i == n - 1))
372 continue;
373 newword[i] = m_Try[j];
374 if (good (newword, 0, 1, 0, 0))
375 {
376 if (ins_cap (newword, word) < 0)
377 return;
378 }
379 }
380 newword[i] = savechar;
381 }
382 }
383
384 /*
385 * \param word
386 */
387 void
extraletter(ichar_t * word)388 ISpellChecker::extraletter (ichar_t *word)
389 {
390 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
391 register ichar_t * p;
392 register ichar_t * r;
393
394 if (icharlen (word) < 2)
395 return;
396
397 icharcpy (newword, word + 1);
398 for (p = word, r = newword; *p != 0; )
399 {
400 if (good (newword, 0, 1, 0, 0))
401 {
402 if (ins_cap (newword, word) < 0)
403 return;
404 }
405 *r++ = *p++;
406 }
407 }
408
409 /*
410 * \param word
411 */
412 void
missingletter(ichar_t * word)413 ISpellChecker::missingletter (ichar_t *word)
414 {
415 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
416 register ichar_t * p;
417 register ichar_t * r;
418 register int i;
419
420 icharcpy (newword + 1, word);
421 for (p = word, r = newword; *p != 0; )
422 {
423 for (i = 0; i < m_Trynum; i++)
424 {
425 if (isboundarych (m_Try[i]) && r == newword)
426 continue;
427 *r = m_Try[i];
428 if (good (newword, 0, 1, 0, 0))
429 {
430 if (ins_cap (newword, word) < 0)
431 return;
432 }
433 }
434 *r++ = *p++;
435 }
436 for (i = 0; i < m_Trynum; i++)
437 {
438 if (isboundarych (m_Try[i]))
439 continue;
440 *r = m_Try[i];
441 if (good (newword, 0, 1, 0, 0))
442 {
443 if (ins_cap (newword, word) < 0)
444 return;
445 }
446 }
447 }
448
449 /*
450 * \param word
451 */
missingspace(ichar_t * word)452 void ISpellChecker::missingspace (ichar_t *word)
453 {
454 ichar_t firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
455 int firstno; /* Index into first */
456 ichar_t * firstp; /* Ptr into current firsthalf word */
457 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
458 int nfirsthalf; /* No. words saved in 1st half */
459 int nsecondhalf; /* No. words saved in 2nd half */
460 register ichar_t * p;
461 ichar_t secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
462 int secondno; /* Index into second */
463
464 /*
465 ** We don't do words of length less than 3; this keeps us from
466 ** splitting all two-letter words into two single letters. We
467 ** also don't do maximum-length words, since adding the space
468 ** would exceed the size of the "possibilities" array.
469 */
470 nfirsthalf = icharlen (word);
471 if (nfirsthalf < 3 || nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1)
472 return;
473 icharcpy (newword + 1, word);
474 for (p = newword + 1; p[1] != '\0'; p++)
475 {
476 p[-1] = *p;
477 *p = '\0';
478 if (good (newword, 0, 1, 0, 0))
479 {
480 /*
481 * Save_cap must be called before good() is called on the
482 * second half, because it uses state left around by
483 * good(). This is unfortunate because it wastes a bit of
484 * time, but I don't think it's a significant performance
485 * problem.
486 */
487 nfirsthalf = save_cap (newword, word, firsthalf);
488 if (good (p + 1, 0, 1, 0, 0))
489 {
490 nsecondhalf = save_cap (p + 1, p + 1, secondhalf);
491 for (firstno = 0; firstno < nfirsthalf; firstno++)
492 {
493 firstp = &firsthalf[firstno][p - newword];
494 for (secondno = 0; secondno < nsecondhalf; secondno++)
495 {
496 *firstp = ' ';
497 icharcpy (firstp + 1, secondhalf[secondno]);
498 if (insert (firsthalf[firstno]) < 0)
499 return;
500 *firstp = '-';
501 if (insert (firsthalf[firstno]) < 0)
502 return;
503 }
504 }
505 }
506 }
507 }
508 }
509
510 /*
511 * \param word
512 * \param pfxopts Options to apply to prefixes
513 */
514 int
compoundgood(ichar_t * word,int pfxopts)515 ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
516 {
517 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
518 register ichar_t * p;
519 register ichar_t savech;
520 long secondcap; /* Capitalization of 2nd half */
521
522 /*
523 ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
524 */
525 if (m_hashheader.compoundflag == COMPOUND_NEVER)
526 return 0;
527 /*
528 ** Test for a possible compound word (for languages like German that
529 ** form lots of compounds).
530 **
531 ** This is similar to missingspace, except we quit on the first hit,
532 ** and we won't allow either member of the compound to be a single
533 ** letter.
534 **
535 ** We don't do words of length less than 2 * compoundmin, since
536 ** both halves must at least compoundmin letters.
537 */
538 if (icharlen (word) < 2 * m_hashheader.compoundmin)
539 return 0;
540 icharcpy (newword, word);
541 p = newword + m_hashheader.compoundmin;
542 for ( ; p[m_hashheader.compoundmin - 1] != 0; p++)
543 {
544 savech = *p;
545 *p = 0;
546 if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
547 {
548 *p = savech;
549 if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
550 || compoundgood (p, FF_COMPOUNDONLY))
551 {
552 secondcap = whatcap (p);
553 switch (whatcap (newword))
554 {
555 case ANYCASE:
556 case CAPITALIZED:
557 case FOLLOWCASE: /* Followcase can have l.c. suffix */
558 return secondcap == ANYCASE;
559 case ALLCAPS:
560 return secondcap == ALLCAPS;
561 }
562 }
563 }
564 else
565 *p = savech;
566 }
567 return 0;
568 }
569
570 /*
571 * \param word
572 */
573 void
transposedletter(ichar_t * word)574 ISpellChecker::transposedletter (ichar_t *word)
575 {
576 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
577 register ichar_t * p;
578 register ichar_t temp;
579
580 icharcpy (newword, word);
581 for (p = newword; p[1] != 0; p++)
582 {
583 temp = *p;
584 *p = p[1];
585 p[1] = temp;
586 if (good (newword, 0, 1, 0, 0))
587 {
588 if (ins_cap (newword, word) < 0)
589 return;
590 }
591 temp = *p;
592 *p = p[1];
593 p[1] = temp;
594 }
595 }
596
597 /*!
598 * Insert one or more correctly capitalized versions of word
599 *
600 * \param word
601 * \param pattern
602 *
603 * \return
604 */
605 int
ins_cap(ichar_t * word,ichar_t * pattern)606 ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern)
607 {
608 int i; /* Index into savearea */
609 int nsaved; /* No. of words saved */
610 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
611
612 nsaved = save_cap (word, pattern, savearea);
613 for (i = 0; i < nsaved; i++)
614 {
615 if (insert (savearea[i]) < 0)
616 return -1;
617 }
618 return 0;
619 }
620
621 /*!
622 * Save one or more correctly capitalized versions of word
623 *
624 * \param word Word to save
625 * \param pattern Prototype capitalization pattern
626 * \param savearea Room to save words
627 *
628 * \return
629 */
630 int
save_cap(ichar_t * word,ichar_t * pattern,ichar_t savearea[MAX_CAPS][INPUTWORDLEN+MAXAFFIXLEN])631 ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern,
632 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])
633 {
634 int hitno; /* Index into hits array */
635 int nsaved; /* Number of words saved */
636 int preadd; /* No. chars added to front of root */
637 int prestrip; /* No. chars stripped from front */
638 int sufadd; /* No. chars added to back of root */
639 int sufstrip; /* No. chars stripped from back */
640
641 if (*word == 0)
642 return 0;
643
644 for (hitno = m_numhits, nsaved = 0; --hitno >= 0 && nsaved < MAX_CAPS; )
645 {
646 if (m_hits[hitno].prefix)
647 {
648 prestrip = m_hits[hitno].prefix->stripl;
649 preadd = m_hits[hitno].prefix->affl;
650 }
651 else
652 prestrip = preadd = 0;
653 if (m_hits[hitno].suffix)
654 {
655 sufstrip = m_hits[hitno].suffix->stripl;
656 sufadd = m_hits[hitno].suffix->affl;
657 }
658 else
659 sufadd = sufstrip = 0;
660 save_root_cap (word, pattern, prestrip, preadd,
661 sufstrip, sufadd,
662 m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix,
663 savearea, &nsaved);
664 }
665 return nsaved;
666 }
667
668 /*
669 * \param word
670 * \param pattern
671 * \param prestrip
672 * \param preadd
673 * \param sufstrip
674 * \param sufadd
675 * \param firstdent
676 * \param pfxent
677 * \param sufent
678 *
679 * \return
680 */
681 int
ins_root_cap(ichar_t * word,ichar_t * pattern,int prestrip,int preadd,int sufstrip,int sufadd,struct dent * firstdent,struct flagent * pfxent,struct flagent * sufent)682 ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern,
683 int prestrip, int preadd, int sufstrip, int sufadd,
684 struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent)
685 {
686 int i; /* Index into savearea */
687 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
688 int nsaved; /* Number of words saved */
689
690 nsaved = 0;
691 save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd,
692 firstdent, pfxent, sufent, savearea, &nsaved);
693 for (i = 0; i < nsaved; i++)
694 {
695 if (insert (savearea[i]) < 0)
696 return -1;
697 }
698 return 0;
699 }
700
701 /* ARGSUSED */
702 /*!
703 * \param word Word to be saved
704 * \param pattern Capitalization pattern
705 * \param prestrip No. chars stripped from front
706 * \param preadd No. chars added to front of root
707 * \param sufstrip No. chars stripped from back
708 * \param sufadd No. chars added to back of root
709 * \param firstdent First dent for root
710 * \param pfxent Pfx-flag entry for word
711 * \param sufent Sfx-flag entry for word
712 * \param savearea Room to save words
713 * \param nsaved Number saved so far (updated)
714 */
715 void
save_root_cap(ichar_t * word,ichar_t * pattern,int prestrip,int preadd,int sufstrip,int sufadd,struct dent * firstdent,struct flagent * pfxent,struct flagent * sufent,ichar_t savearea[MAX_CAPS][INPUTWORDLEN+MAXAFFIXLEN],int * nsaved)716 ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern,
717 int prestrip, int preadd, int sufstrip, int sufadd,
718 struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent,
719 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
720 int * nsaved)
721 {
722 #ifndef NO_CAPITALIZATION_SUPPORT
723 register struct dent * dent;
724 #endif /* NO_CAPITALIZATION_SUPPORT */
725 int firstisupper;
726 ichar_t newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
727 #ifndef NO_CAPITALIZATION_SUPPORT
728 register ichar_t * p;
729 int len;
730 int i;
731 int limit;
732 #endif /* NO_CAPITALIZATION_SUPPORT */
733
734 if (*nsaved >= MAX_CAPS)
735 return;
736 icharcpy (newword, word);
737 firstisupper = myupper (pattern[0]);
738 #ifdef NO_CAPITALIZATION_SUPPORT
739 /*
740 ** Apply the old, simple-minded capitalization rules.
741 */
742 if (firstisupper)
743 {
744 if (myupper (pattern[1]))
745 upcase (newword);
746 else
747 {
748 lowcase (newword);
749 newword[0] = mytoupper (newword[0]);
750 }
751 }
752 else
753 lowcase (newword);
754 icharcpy (savearea[*nsaved], newword);
755 (*nsaved)++;
756 return;
757 #else /* NO_CAPITALIZATION_SUPPORT */
758 #define flagsareok(dent) \
759 ((pfxent == NULL \
760 || TSTMASKBIT (dent->mask, pfxent->flagbit)) \
761 && (sufent == NULL \
762 || TSTMASKBIT (dent->mask, sufent->flagbit)))
763
764 dent = firstdent;
765 if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
766 {
767 upcase (newword); /* Uppercase required */
768 icharcpy (savearea[*nsaved], newword);
769 (*nsaved)++;
770 return;
771 }
772 for (p = pattern; *p; p++)
773 {
774 if (mylower (*p))
775 break;
776 }
777 if (*p == 0)
778 {
779 upcase (newword); /* Pattern was all caps */
780 icharcpy (savearea[*nsaved], newword);
781 (*nsaved)++;
782 return;
783 }
784 for (p = pattern + 1; *p; p++)
785 {
786 if (myupper (*p))
787 break;
788 }
789 if (*p == 0)
790 {
791 /*
792 ** The pattern was all-lower or capitalized. If that's
793 ** legal, insert only that version.
794 */
795 if (firstisupper)
796 {
797 if (captype (dent->flagfield) == CAPITALIZED
798 || captype (dent->flagfield) == ANYCASE)
799 {
800 lowcase (newword);
801 newword[0] = mytoupper (newword[0]);
802 icharcpy (savearea[*nsaved], newword);
803 (*nsaved)++;
804 return;
805 }
806 }
807 else
808 {
809 if (captype (dent->flagfield) == ANYCASE)
810 {
811 lowcase (newword);
812 icharcpy (savearea[*nsaved], newword);
813 (*nsaved)++;
814 return;
815 }
816 }
817 while (dent->flagfield & MOREVARIANTS)
818 {
819 dent = dent->next;
820 if (captype (dent->flagfield) == FOLLOWCASE
821 || !flagsareok (dent))
822 continue;
823 if (firstisupper)
824 {
825 if (captype (dent->flagfield) == CAPITALIZED)
826 {
827 lowcase (newword);
828 newword[0] = mytoupper (newword[0]);
829 icharcpy (savearea[*nsaved], newword);
830 (*nsaved)++;
831 return;
832 }
833 }
834 else
835 {
836 if (captype (dent->flagfield) == ANYCASE)
837 {
838 lowcase (newword);
839 icharcpy (savearea[*nsaved], newword);
840 (*nsaved)++;
841 return;
842 }
843 }
844 }
845 }
846 /*
847 ** Either the sample had complex capitalization, or the simple
848 ** capitalizations (all-lower or capitalized) are illegal.
849 ** Insert all legal capitalizations, including those that are
850 ** all-lower or capitalized. If the prototype is capitalized,
851 ** capitalized all-lower samples. Watch out for affixes.
852 */
853 dent = firstdent;
854 p = strtosichar (dent->word, 1);
855 len = icharlen (p);
856 if (dent->flagfield & MOREVARIANTS)
857 dent = dent->next; /* Skip place-holder entry */
858 for ( ; ; )
859 {
860 if (flagsareok (dent))
861 {
862 if (captype (dent->flagfield) != FOLLOWCASE)
863 {
864 lowcase (newword);
865 if (firstisupper || captype (dent->flagfield) == CAPITALIZED)
866 newword[0] = mytoupper (newword[0]);
867 icharcpy (savearea[*nsaved], newword);
868 (*nsaved)++;
869 if (*nsaved >= MAX_CAPS)
870 return;
871 }
872 else
873 {
874 /* Followcase is the tough one. */
875 p = strtosichar (dent->word, 1);
876 memmove (
877 reinterpret_cast<char *>(newword + preadd),
878 reinterpret_cast<char *>(p + prestrip),
879 (len - prestrip - sufstrip) * sizeof (ichar_t));
880 if (myupper (p[prestrip]))
881 {
882 for (i = 0; i < preadd; i++)
883 newword[i] = mytoupper (newword[i]);
884 }
885 else
886 {
887 for (i = 0; i < preadd; i++)
888 newword[i] = mytolower (newword[i]);
889 }
890 limit = len + preadd + sufadd - prestrip - sufstrip;
891 i = len + preadd - prestrip - sufstrip;
892 p += len - sufstrip - 1;
893 if (myupper (*p))
894 {
895 for (p = newword + i; i < limit; i++, p++)
896 *p = mytoupper (*p);
897 }
898 else
899 {
900 for (p = newword + i; i < limit; i++, p++)
901 *p = mytolower (*p);
902 }
903 icharcpy (savearea[*nsaved], newword);
904 (*nsaved)++;
905 if (*nsaved >= MAX_CAPS)
906 return;
907 }
908 }
909 if ((dent->flagfield & MOREVARIANTS) == 0)
910 break; /* End of the line */
911 dent = dent->next;
912 }
913 return;
914 #endif /* NO_CAPITALIZATION_SUPPORT */
915 }
916
917
918