1 // This file is part of The New Aspell
2 // Copyright (C) 2004 by Kevin Atkinson under the GNU LGPL
3 // license version 2.0 or 2.1. You should have received a copy of the
4 // LGPL license along with this library if you did not you can find it
5 // at http://www.gnu.org/.
6 //
7 // This code is based on the the MySpell affix code:
8 //
9 /*
10 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada And
11 * Contributors. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 *
24 * 3. All modifications to the source code must be clearly marked as
25 * such. Binary redistributions based on modified source code
26 * must be clearly marked as modified versions in the documentation
27 * and/or other materials provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
32 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
33 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
34 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
36 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 */
43
44 #include <cstdlib>
45 #include <cstring>
46 #include <cstdio>
47
48 //#include "iostream.hpp"
49
50 #include "affix.hpp"
51 #include "errors.hpp"
52 #include "getdata.hpp"
53 #include "parm_string.hpp"
54 #include "check_list.hpp"
55 #include "speller_impl.hpp"
56 #include "vararray.hpp"
57 #include "lsort.hpp"
58 #include "hash-t.hpp"
59
60 #include "gettext.h"
61
62 using namespace std;
63
64 namespace aspeller {
65
66 typedef unsigned char byte;
67 static char EMPTY[1] = {0};
68
69 //////////////////////////////////////////////////////////////////////
70 //
71 // Entry struct definations
72 //
73
74 struct Conds
75 {
76 char * str;
77 unsigned num;
78 char conds[SETSIZE];
getaspeller::Conds79 char get(byte i) const {return conds[i];}
80 };
81
82 struct AffEntry
83 {
84 const char * appnd;
85 const char * strip;
86 byte appndl;
87 byte stripl;
88 byte xpflg;
89 char achar;
90 const Conds * conds;
91 //unsigned int numconds;
92 //char conds[SETSIZE];
93 };
94
95 // A Prefix Entry
96
97 struct PfxEntry : public AffEntry
98 {
99 PfxEntry * next;
100 PfxEntry * next_eq;
101 PfxEntry * next_ne;
102 PfxEntry * flag_next;
PfxEntryaspeller::PfxEntry103 PfxEntry() {}
104
105 bool check(const LookupInfo &, const AffixMgr * pmyMgr,
106 ParmString, CheckInfo &, GuessInfo *, bool cross = true) const;
107
allow_crossaspeller::PfxEntry108 inline bool allow_cross() const { return ((xpflg & XPRODUCT) != 0); }
flagaspeller::PfxEntry109 inline byte flag() const { return achar; }
keyaspeller::PfxEntry110 inline const char * key() const { return appnd; }
111 bool applicable(SimpleString) const;
112 SimpleString add(SimpleString, ObjStack & buf) const;
113 };
114
115 // A Suffix Entry
116
117 struct SfxEntry : public AffEntry
118 {
119 const char * rappnd; // this is set in AffixMgr::build_sfxlist
120
121 SfxEntry * next;
122 SfxEntry * next_eq;
123 SfxEntry * next_ne;
124 SfxEntry * flag_next;
125
SfxEntryaspeller::SfxEntry126 SfxEntry() {}
127
128 bool check(const LookupInfo &, ParmString, CheckInfo &, GuessInfo *,
129 int optflags, AffEntry * ppfx);
130
allow_crossaspeller::SfxEntry131 inline bool allow_cross() const { return ((xpflg & XPRODUCT) != 0); }
flagaspeller::SfxEntry132 inline byte flag() const { return achar; }
keyaspeller::SfxEntry133 inline const char * key() const { return rappnd; }
134 bool applicable(SimpleString) const;
135 SimpleString add(SimpleString, ObjStack & buf, int limit, SimpleString) const;
136 };
137
138 //////////////////////////////////////////////////////////////////////
139 //
140 // Utility functions declarations
141 //
142
143 /* return 1 if s1 is subset of s2 */
isSubset(const char * s1,const char * s2)144 static bool isSubset(const char * s1, const char * s2)
145 {
146 while( *s1 && (*s1 == *s2) ) {
147 s1++;
148 s2++;
149 }
150 return (*s1 == '\0');
151 }
152
153 // return 1 if s1 (reversed) is a leading subset of end of s2
isRevSubset(const char * s1,const char * end_of_s2,int len)154 static bool isRevSubset(const char * s1, const char * end_of_s2, int len)
155 {
156 while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
157 s1++;
158 end_of_s2--;
159 len --;
160 }
161 return (*s1 == '\0');
162 }
163
164 template <class T>
165 struct AffixLess
166 {
operator ()aspeller::AffixLess167 bool operator() (T * x, T * y) const {return strcmp(x->key(),y->key()) < 0;}
168 };
169
170 // struct StringLookup {
171 // struct Parms {
172 // typedef const char * Value;
173 // typedef const char * Key;
174 // static const bool is_multi = false;
175 // hash<const char *> hfun;
176 // size_t hash(const char * s) {return hfun(s);}
177 // bool equal(const char * x, const char * y) {return strcmp(x,y) == 0;}
178 // const char * key(const char * c) {return c;}
179 // };
180 // typedef HashTable<Parms> Lookup;
181 // Lookup lookup;
182 // ObjStack * data_buf;
183 // StringLookup(ObjStack * b) : data_buf(b) {}
184 // const char * dup(const char * orig) {
185 // pair<Lookup::iterator, bool> res = lookup.insert(orig);
186 // if (res.second) *res.first = data_buf->dup(orig);
187 // return *res.first;
188 // //return data_buf->dup(orig);
189 // }
190 // };
191
192 struct CondsLookupParms {
193 typedef const Conds * Value;
194 typedef const char * Key;
195 static const bool is_multi = false;
196 acommon::hash<const char *> hfun;
hashaspeller::CondsLookupParms197 size_t hash(const char * s) {return hfun(s);}
equalaspeller::CondsLookupParms198 bool equal(const char * x, const char * y) {return strcmp(x,y) == 0;}
keyaspeller::CondsLookupParms199 const char * key(const Conds * c) {return c->str;}
200 };
201
202 typedef HashTable<CondsLookupParms> CondsLookup;
203
204 // normalizes and checks the cond_str
205 // returns the length of the new string or -1 if invalid
normalize_cond_str(char * str)206 static int normalize_cond_str(char * str)
207 {
208 char * s = str;
209 char * d = str;
210 while (*s) {
211 if (*s != '[') {
212 *d++ = *s++;
213 } else if (s[1] == '\0' || s[1] == ']') {
214 return -1;
215 } else if (s[2] == ']') {
216 *d++ = s[1];
217 s += 3;
218 } else {
219 *d++ = *s++;
220 if (*s == '^') *d++ = *s++;
221 while (*s != ']') {
222 if (*s == '\0' || *s == '[') return -1;
223 char * min = s;
224 for (char * i = s + 1; *i != ']'; ++i) {
225 if ((byte)*i < (byte)*min) min = i;}
226 char c = *s;
227 *d++ = *min;
228 *min = c;
229 ++s;
230 }
231 *d++ = *s++;
232 }
233 }
234 *d = '\0';
235 return d - str;
236 }
237
238 static void encodeit(CondsLookup &, ObjStack &,
239 AffEntry * ptr, char * cs);
240
241 //////////////////////////////////////////////////////////////////////
242 //
243 // Affix Manager
244 //
245
setup(ParmString affpath,Conv & iconv)246 PosibErr<void> AffixMgr::setup(ParmString affpath, Conv & iconv)
247 {
248 // register hash manager and load affix data from aff file
249 //cpdmin = 3; // default value
250 max_strip_ = 0;
251 for (int i=0; i < SETSIZE; i++) {
252 pStart[i] = NULL;
253 sStart[i] = NULL;
254 pFlag[i] = NULL;
255 sFlag[i] = NULL;
256 max_strip_f[i] = 0;
257 }
258 return parse_file(affpath, iconv);
259 }
260
AffixMgr(const Language * l)261 AffixMgr::AffixMgr(const Language * l)
262 : lang(l), data_buf(1024*16) {}
263
~AffixMgr()264 AffixMgr::~AffixMgr() {}
265
max_(int & lhs,int rhs)266 static inline void max_(int & lhs, int rhs)
267 {
268 if (lhs < rhs) lhs = rhs;
269 }
270
271 // read in aff file and build up prefix and suffix entry objects
parse_file(const char * affpath,Conv & iconv)272 PosibErr<void> AffixMgr::parse_file(const char * affpath, Conv & iconv)
273 {
274 // io buffers
275 String buf; DataPair dp;
276
277 CondsLookup conds_lookup;
278
279 // open the affix file
280 affix_file = data_buf.dup(affpath);
281 FStream afflst;
282 RET_ON_ERR(afflst.open(affpath,"r"));
283
284 // step one is to parse the affix file building up the internal
285 // affix data structures
286
287 // read in each line ignoring any that do not
288 // start with a known line type indicator
289
290 char prev_aff = '\0';
291
292 while (getdata_pair(afflst,dp,buf)) {
293 char affix_type = ' ';
294
295 /* parse in the name of the character set used by the .dict and .aff */
296
297 if (dp.key == "SET") {
298 String buf;
299 encoding = data_buf.dup(fix_encoding_str(dp.value, buf));
300 if (strcmp(encoding, lang->data_encoding()) != 0)
301 return make_err(incorrect_encoding, affix_file, lang->data_encoding(), encoding);
302 }
303
304 /* parse in the flag used by the controlled compound words */
305 //else if (d.key == "COMPOUNDFLAG")
306 // compound = data_buf.dup(d.value);
307
308 /* parse in the flag used by the controlled compound words */
309 //else if (d.key == "COMPOUNDMIN")
310 // cpdmin = atoi(d.value); // FiXME
311
312 //else if (dp.key == "TRY" || dp.key == "REP");
313
314 else if (dp.key == "PFX" || dp.key == "SFX")
315 affix_type = dp.key[0];
316
317 if (affix_type == ' ') continue;
318
319 //
320 // parse this affix: P - prefix, S - suffix
321 //
322
323 int numents = 0; // number of affentry structures to parse
324 char achar='\0'; // affix char identifier
325 short xpflg=0;
326 AffEntry * nptr;
327 {
328 // split affix header line into pieces
329 split(dp);
330 if (dp.key.empty()) goto error;
331 // key is affix char
332 const char * astr = iconv(dp.key);
333 if (astr[0] == '\0' || astr[1] != '\0') goto error;
334 achar = astr[0];
335 if (achar == prev_aff) goto error_count;
336 prev_aff = achar;
337
338 split(dp);
339 if (dp.key.size != 1 ||
340 !(dp.key[0] == 'Y' || dp.key[0] == 'N')) goto error;
341 // key is cross product indicator
342 if (dp.key[0] == 'Y') xpflg = XPRODUCT;
343
344 split(dp);
345 if (dp.key.empty()) goto error;
346 // key is number of affentries
347
348 numents = atoi(dp.key);
349
350 for (int j = 0; j < numents; j++) {
351 getdata_pair(afflst, dp, buf);
352
353 if (affix_type == 'P') {
354 nptr = (AffEntry *) data_buf.alloc_bottom(sizeof(PfxEntry));
355 new (nptr) PfxEntry;
356 } else {
357 nptr = (AffEntry *) data_buf.alloc_bottom(sizeof(SfxEntry));
358 new (nptr) SfxEntry;
359 }
360
361 nptr->xpflg = xpflg;
362
363 split(dp);
364 if (dp.key.empty()) goto error;
365 // key is affix charter
366 if (iconv(dp.key)[0] != achar) goto error_count;
367 nptr->achar = achar;
368
369 split(dp);
370 if (dp.key.empty()) goto error;
371 // key is strip
372 if (dp.key != "0") {
373 ParmString s0(iconv(dp.key));
374 max_(max_strip_, s0.size());
375 max_(max_strip_f[(byte)achar], s0.size());
376 nptr->strip = data_buf.dup(s0);
377 nptr->stripl = s0.size();
378 } else {
379 nptr->strip = "";
380 nptr->stripl = 0;
381 }
382
383 split(dp);
384 if (dp.key.empty()) goto error;
385 // key is affix string or 0 for null
386 if (dp.key != "0") {
387 nptr->appnd = data_buf.dup(iconv(dp.key));
388 nptr->appndl = strlen(nptr->appnd);
389 } else {
390 nptr->appnd = "";
391 nptr->appndl = 0;
392 }
393
394 split(dp);
395 if (dp.key.empty()) goto error;
396 // key is the conditions descriptions
397 char * cond = iconv(dp.key);
398 int cond_len = normalize_cond_str(cond);
399 if (cond_len < 0)
400 return (make_err(invalid_cond, MsgConv(lang)(cond))
401 .with_file(affix_file, dp.line_num));
402 if (nptr->stripl != 0) {
403 char * cc = cond;
404 if (affix_type == 'S') cc += cond_len - nptr->stripl;
405 if (cond_len < nptr->stripl ||
406 memcmp(cc, nptr->strip, nptr->stripl) != 0)
407 return (make_err(invalid_cond_strip,
408 MsgConv(lang)(cond), MsgConv(lang)(nptr->strip))
409 .with_file(affix_file, dp.line_num));
410 }
411 encodeit(conds_lookup, data_buf, nptr, cond);
412
413 // now create SfxEntry or PfxEntry objects and use links to
414 // build an ordered (sorted by affix string) list
415 if (affix_type == 'P')
416 build_pfxlist(static_cast<PfxEntry *>(nptr));
417 else
418 build_sfxlist(static_cast<SfxEntry *>(nptr));
419 }
420 }
421 continue;
422 error:
423 return make_err(corrupt_affix, MsgConv(lang)(achar)).with_file(affix_file, dp.line_num);
424 error_count:
425 return make_err(corrupt_affix, MsgConv(lang)(achar),
426 _("Possibly incorrect count.")).with_file(affix_file, dp.line_num);
427 }
428 afflst.close();
429
430 // now we can speed up performance greatly taking advantage of the
431 // relationship between the affixes and the idea of "subsets".
432
433 // View each prefix as a potential leading subset of another and view
434 // each suffix (reversed) as a potential trailing subset of another.
435
436 // To illustrate this relationship if we know the prefix "ab" is
437 // found in the word to examine, only prefixes that "ab" is a
438 // leading subset of need be examined. Furthermore is "ab" is not
439 // present then none of the prefixes that "ab" is is a subset need
440 // be examined.
441
442 // The same argument goes for suffix string that are reversed.
443
444 // Then to top this off why not examine the first char of the word
445 // to quickly limit the set of prefixes to examine (i.e. the
446 // prefixes to examine must be leading supersets of the first
447 // character of the word (if they exist)
448
449 // To take advantage of this "subset" relationship, we need to add
450 // two links from entry. One to take next if the current prefix
451 // is found (call it nexteq) and one to take next if the current
452 // prefix is not found (call it nextne).
453
454 // Since we have built ordered lists, all that remains is to
455 // properly initialize the nextne and nexteq pointers that relate
456 // them
457
458 process_pfx_order();
459 process_sfx_order();
460
461 //CERR.printf("%u\n", data_buf.calc_size()/1024);
462
463 return no_err;
464
465 }
466
467
468 // we want to be able to quickly access prefix information
469 // both by prefix flag, and sorted by prefix string itself
470 // so we need to set up two indexes
471
build_pfxlist(PfxEntry * pfxptr)472 PosibErr<void> AffixMgr::build_pfxlist(PfxEntry* pfxptr)
473 {
474 PfxEntry * ptr;
475 PfxEntry * ep = pfxptr;
476
477 // get the right starting point
478 const char * key = ep->key();
479 const byte flg = ep->flag();
480
481 // first index by flag which must exist
482 ptr = pFlag[flg];
483 ep->flag_next = ptr;
484 pFlag[flg] = ep;
485
486 // next insert the affix string, it will be sorted latter
487
488 byte sp = *((const byte *)key);
489 ptr = pStart[sp];
490 ep->next = ptr;
491 pStart[sp] = ep;
492 return no_err;
493 }
494
495 // we want to be able to quickly access suffix information
496 // both by suffix flag, and sorted by the reverse of the
497 // suffix string itself; so we need to set up two indexes
498
build_sfxlist(SfxEntry * sfxptr)499 PosibErr<void> AffixMgr::build_sfxlist(SfxEntry* sfxptr)
500 {
501 SfxEntry * ptr;
502 SfxEntry * ep = sfxptr;
503 char * tmp = (char *)data_buf.alloc(sfxptr->appndl + 1);
504 sfxptr->rappnd = tmp;
505
506 // reverse the string
507 char * dest = tmp + sfxptr->appndl;
508 *dest-- = 0;
509 const char * src = sfxptr->appnd;
510 for (; dest >= tmp; --dest, ++src)
511 *dest = *src;
512
513 /* get the right starting point */
514 const char * key = ep->key();
515 const byte flg = ep->flag();
516
517 // first index by flag which must exist
518 ptr = sFlag[flg];
519 ep->flag_next = ptr;
520 sFlag[flg] = ep;
521
522 // next insert the affix string, it will be sorted latter
523
524 byte sp = *((const byte *)key);
525 ptr = sStart[sp];
526 ep->next = ptr;
527 sStart[sp] = ep;
528 return no_err;
529 }
530
531
532
533 // initialize the PfxEntry links NextEQ and NextNE to speed searching
process_pfx_order()534 PosibErr<void> AffixMgr::process_pfx_order()
535 {
536 PfxEntry* ptr;
537
538 // loop through each prefix list starting point
539 for (int i=1; i < SETSIZE; i++) {
540
541 ptr = pStart[i];
542
543 if (ptr && ptr->next)
544 ptr = pStart[i] = sort(ptr, AffixLess<PfxEntry>());
545
546 // look through the remainder of the list
547 // and find next entry with affix that
548 // the current one is not a subset of
549 // mark that as destination for NextNE
550 // use next in list that you are a subset
551 // of as NextEQ
552
553 for (; ptr != NULL; ptr = ptr->next) {
554
555 PfxEntry * nptr = ptr->next;
556 for (; nptr != NULL; nptr = nptr->next) {
557 if (! isSubset( ptr->key() , nptr->key() )) break;
558 }
559 ptr->next_ne = nptr;
560 ptr->next_eq = NULL;
561 if ((ptr->next) && isSubset(ptr->key() ,
562 (ptr->next)->key()))
563 ptr->next_eq = ptr->next;
564 }
565
566 // now clean up by adding smart search termination strings
567 // if you are already a superset of the previous prefix
568 // but not a subset of the next, search can end here
569 // so set NextNE properly
570
571 ptr = pStart[i];
572 for (; ptr != NULL; ptr = ptr->next) {
573 PfxEntry * nptr = ptr->next;
574 PfxEntry * mptr = NULL;
575 for (; nptr != NULL; nptr = nptr->next) {
576 if (! isSubset(ptr->key(),nptr->key())) break;
577 mptr = nptr;
578 }
579 if (mptr) mptr->next_ne = NULL;
580 }
581 }
582 return no_err;
583 }
584
585
586
587 // initialize the SfxEntry links NextEQ and NextNE to speed searching
process_sfx_order()588 PosibErr<void> AffixMgr::process_sfx_order()
589 {
590 SfxEntry* ptr;
591
592 // loop through each prefix list starting point
593 for (int i=1; i < SETSIZE; i++) {
594
595 ptr = sStart[i];
596
597 if (ptr && ptr->next)
598 ptr = sStart[i] = sort(ptr, AffixLess<SfxEntry>());
599
600 // look through the remainder of the list
601 // and find next entry with affix that
602 // the current one is not a subset of
603 // mark that as destination for NextNE
604 // use next in list that you are a subset
605 // of as NextEQ
606
607 for (; ptr != NULL; ptr = ptr->next) {
608 SfxEntry * nptr = ptr->next;
609 for (; nptr != NULL; nptr = nptr->next) {
610 if (! isSubset(ptr->key(),nptr->key())) break;
611 }
612 ptr->next_ne = nptr;
613 ptr->next_eq = NULL;
614 if ((ptr->next) && isSubset(ptr->key(),(ptr->next)->key()))
615 ptr->next_eq = ptr->next;
616 }
617
618
619 // now clean up by adding smart search termination strings:
620 // if you are already a superset of the previous suffix
621 // but not a subset of the next, search can end here
622 // so set NextNE properly
623
624 ptr = sStart[i];
625 for (; ptr != NULL; ptr = ptr->next) {
626 SfxEntry * nptr = ptr->next;
627 SfxEntry * mptr = NULL;
628 for (; nptr != NULL; nptr = nptr->next) {
629 if (! isSubset(ptr->key(),nptr->key())) break;
630 mptr = nptr;
631 }
632 if (mptr) mptr->next_ne = NULL;
633 }
634 }
635 return no_err;
636 }
637
638 // takes aff file condition string and creates the
639 // conds array - please see the appendix at the end of the
640 // file affentry.cxx which describes what is going on here
641 // in much more detail
642
encodeit(CondsLookup & l,ObjStack & buf,AffEntry * ptr,char * cs)643 static void encodeit(CondsLookup & l, ObjStack & buf,
644 AffEntry * ptr, char * cs)
645 {
646 byte c;
647 int i, j, k;
648
649 // see if we already have this conds matrix
650
651 CondsLookup::iterator itr = l.find(cs);
652 if (!(itr == l.end())) {
653 ptr->conds = *itr;
654 return;
655 }
656
657 Conds * cds = (Conds *)buf.alloc_bottom(sizeof(Conds));
658 cds->str = buf.dup(cs);
659 l.insert(cds);
660 ptr->conds = cds;
661
662 int nc = strlen(cs);
663 VARARRAYM(byte, mbr, nc + 1, MAXLNLEN);
664
665 // now clear the conditions array
666 memset(cds->conds, 0, sizeof(cds->conds));
667
668 // now parse the string to create the conds array
669
670 int neg = 0; // complement indicator
671 int grp = 0; // group indicator
672 int n = 0; // number of conditions
673 int ec = 0; // end condition indicator
674 int nm = 0; // number of member in group
675
676 // if no condition just return
677 if (strcmp(cs,".")==0) {
678 cds->num = 0;
679 return;
680 }
681
682 i = 0;
683 while (i < nc) {
684 c = *((byte *)(cs + i));
685
686 // start group indicator
687 if (c == '[') {
688 grp = 1;
689 c = 0;
690 }
691
692 // complement flag
693 if ((grp == 1) && (c == '^')) {
694 neg = 1;
695 c = 0;
696 }
697
698 // end goup indicator
699 if (c == ']') {
700 ec = 1;
701 c = 0;
702 }
703
704 // add character of group to list
705 if ((grp == 1) && (c != 0)) {
706 *(mbr + nm) = c;
707 nm++;
708 c = 0;
709 }
710
711 // end of condition
712 if (c != 0) {
713 ec = 1;
714 }
715
716
717 if (ec) {
718 if (grp == 1) {
719 if (neg == 0) {
720 // set the proper bits in the condition array vals for those chars
721 for (j=0;j<nm;j++) {
722 k = (unsigned int) mbr[j];
723 cds->conds[k] = cds->conds[k] | (1 << n);
724 }
725 } else {
726 // complement so set all of them and then unset indicated ones
727 for (j=0;j<SETSIZE;j++) cds->conds[j] = cds->conds[j] | (1 << n);
728 for (j=0;j<nm;j++) {
729 k = (unsigned int) mbr[j];
730 cds->conds[k] = cds->conds[k] & ~(1 << n);
731 }
732 }
733 neg = 0;
734 grp = 0;
735 nm = 0;
736 } else {
737 // not a group so just set the proper bit for this char
738 // but first handle special case of . inside condition
739 if (c == '.') {
740 // wild card character so set them all
741 for (j=0;j<SETSIZE;j++) cds->conds[j] = cds->conds[j] | (1 << n);
742 } else {
743 cds->conds[(unsigned int)c] = cds->conds[(unsigned int)c] | (1 << n);
744 }
745 }
746 n++;
747 ec = 0;
748 }
749
750
751 i++;
752 }
753 cds->num = n;
754 return;
755 }
756
757
758 // check word for prefixes
prefix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,bool cross) const759 bool AffixMgr::prefix_check (const LookupInfo & linf, ParmString word,
760 CheckInfo & ci, GuessInfo * gi, bool cross) const
761 {
762 if (word.empty()) return false;
763
764 // first handle the special case of 0 length prefixes
765 PfxEntry * pe = pStart[0];
766 while (pe) {
767 if (pe->check(linf,this,word,ci,gi)) return true;
768 pe = pe->next;
769 }
770
771 // now handle the general case
772 byte sp = *reinterpret_cast<const byte *>(word.str());
773 PfxEntry * pptr = pStart[sp];
774
775 while (pptr) {
776 if (isSubset(pptr->key(),word)) {
777 if (pptr->check(linf,this,word,ci,gi,cross)) return true;
778 pptr = pptr->next_eq;
779 } else {
780 pptr = pptr->next_ne;
781 }
782 }
783
784 return false;
785 }
786
787
788 // check word for suffixes
suffix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,int sfxopts,AffEntry * ppfx) const789 bool AffixMgr::suffix_check (const LookupInfo & linf, ParmString word,
790 CheckInfo & ci, GuessInfo * gi,
791 int sfxopts, AffEntry * ppfx) const
792 {
793 if (word.empty()) return false;
794
795 // first handle the special case of 0 length suffixes
796 SfxEntry * se = sStart[0];
797 while (se) {
798 if (se->check(linf, word, ci, gi, sfxopts, ppfx)) return true;
799 se = se->next;
800 }
801
802 if (word.size() == 0)
803 return false;
804
805 // now handle the general case
806 byte sp = *((const byte *)(word + word.size() - 1));
807 SfxEntry * sptr = sStart[sp];
808
809 while (sptr) {
810 if (isRevSubset(sptr->key(), word + word.size() - 1, word.size())) {
811 if (sptr->check(linf, word, ci, gi, sfxopts, ppfx)) return true;
812 sptr = sptr->next_eq;
813 } else {
814 sptr = sptr->next_ne;
815 }
816 }
817
818 return false;
819 }
820
821 // check if word with affixes is correctly spelled
affix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi) const822 bool AffixMgr::affix_check(const LookupInfo & linf, ParmString word,
823 CheckInfo & ci, GuessInfo * gi) const
824 {
825 if (word.empty()) return false;
826
827 // Deal With Case in a semi-intelligent manner
828 CasePattern cp = lang->LangImpl::case_pattern(word);
829 ParmString pword = word;
830 ParmString sword = word;
831 CharVector lower;
832 if (cp == FirstUpper) {
833 lower.append(word, word.size() + 1);
834 lower[0] = lang->to_lower(word[0]);
835 pword = ParmString(lower.data(), lower.size() - 1);
836 } else if (cp == AllUpper) {
837 lower.resize(word.size() + 1);
838 unsigned int i = 0;
839 for (; i != word.size(); ++i)
840 lower[i] = lang->to_lower(word[i]);
841 lower[i] = '\0';
842 pword = ParmString(lower.data(), lower.size() - 1);
843 sword = pword;
844 }
845
846 // check all prefixes (also crossed with suffixes if allowed)
847 if (prefix_check(linf, pword, ci, gi)) return true;
848
849 // if still not found check all suffixes
850 if (suffix_check(linf, sword, ci, gi, 0, NULL)) return true;
851
852 // if still not found check again but with the lower case version
853 // which can make a difference if the entire word matches the cond
854 // string
855 if (cp == FirstUpper) {
856 return suffix_check(linf, pword, ci, gi, 0, NULL);
857 } else {
858 return false;
859 }
860 }
861
munch(ParmString word,GuessInfo * gi,bool cross) const862 void AffixMgr::munch(ParmString word, GuessInfo * gi, bool cross) const
863 {
864 LookupInfo li(0, LookupInfo::AlwaysTrue);
865 CheckInfo ci;
866 gi->reset();
867 CasePattern cp = lang->LangImpl::case_pattern(word);
868 if (cp == AllUpper) return;
869 if (cp != FirstUpper)
870 prefix_check(li, word, ci, gi, cross);
871 suffix_check(li, word, ci, gi, 0, NULL);
872 }
873
expand(ParmString word,ParmString aff,ObjStack & buf,int limit) const874 WordAff * AffixMgr::expand(ParmString word, ParmString aff,
875 ObjStack & buf, int limit) const
876 {
877 byte * empty = (byte *)buf.alloc(1);
878 *empty = 0;
879
880 byte * suf = (byte *)buf.alloc(aff.size() + 1);
881 byte * suf_e = suf;
882 byte * csuf = (byte *)buf.alloc(aff.size() + 1);
883 byte * csuf_e = csuf;
884
885 WordAff * head = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
886 WordAff * cur = head;
887 cur->word = buf.dup(word);
888 cur->aff = suf;
889
890 for (const byte * c = (const byte *)aff.str(), * end = c + aff.size();
891 c != end;
892 ++c)
893 {
894 if (sFlag[*c]) *suf_e++ = *c;
895 if (sFlag[*c] && sFlag[*c]->allow_cross()) *csuf_e++ = *c;
896
897 for (PfxEntry * p = pFlag[*c]; p; p = p->flag_next) {
898 SimpleString newword = p->add(word, buf);
899 if (!newword) continue;
900 cur->next = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
901 cur = cur->next;
902 cur->word = newword;
903 cur->aff = p->allow_cross() ? csuf : empty;
904 }
905 }
906
907 *suf_e = 0;
908 *csuf_e = 0;
909 cur->next = 0;
910
911 if (limit == 0) return head;
912
913 WordAff * * end = &cur->next;
914 WordAff * * very_end = end;
915 size_t nsuf_s = suf_e - suf + 1;
916
917 for (WordAff * * cur = &head; cur != end; cur = &(*cur)->next) {
918 if ((int)(*cur)->word.size - max_strip_ >= limit) continue;
919 byte * nsuf = (byte *)buf.alloc(nsuf_s);
920 expand_suffix((*cur)->word, (*cur)->aff, buf, limit, nsuf, &very_end, word);
921 (*cur)->aff = nsuf;
922 }
923
924 return head;
925 }
926
expand_suffix(ParmString word,const byte * aff,ObjStack & buf,int limit,byte * new_aff,WordAff *** l,ParmString orig_word) const927 WordAff * AffixMgr::expand_suffix(ParmString word, const byte * aff,
928 ObjStack & buf, int limit,
929 byte * new_aff, WordAff * * * l,
930 ParmString orig_word) const
931 {
932 WordAff * head = 0;
933 if (l) head = **l;
934 WordAff * * cur = l ? *l : &head;
935 bool expanded = false;
936 bool not_expanded = false;
937 if (!orig_word) orig_word = word;
938
939 while (*aff) {
940 if ((int)word.size() - max_strip_f[*aff] < limit) {
941 for (SfxEntry * p = sFlag[*aff]; p; p = p->flag_next) {
942 SimpleString newword = p->add(word, buf, limit, orig_word);
943 if (!newword) continue;
944 if (newword == EMPTY) {not_expanded = true; continue;}
945 *cur = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
946 (*cur)->word = newword;
947 (*cur)->aff = (const byte *)EMPTY;
948 cur = &(*cur)->next;
949 expanded = true;
950 }
951 }
952 if (new_aff && (!expanded || not_expanded)) *new_aff++ = *aff;
953 ++aff;
954 }
955 *cur = 0;
956 if (new_aff) *new_aff = 0;
957 if (l) *l = cur;
958 return head;
959 }
960
check_affix(ParmString word,char aff) const961 CheckAffixRes AffixMgr::check_affix(ParmString word, char aff) const
962 {
963 CheckAffixRes res = InvalidAffix;
964
965 for (PfxEntry * p = pFlag[(unsigned char)aff]; p; p = p->flag_next) {
966 res = InapplicableAffix;
967 if (p->applicable(word)) return ValidAffix;
968 }
969
970 for (SfxEntry * p = sFlag[(unsigned char)aff]; p; p = p->flag_next) {
971 if (res == InvalidAffix) res = InapplicableAffix;
972 if (p->applicable(word)) return ValidAffix;
973 }
974
975 return res;
976 }
977
978
979
980 //////////////////////////////////////////////////////////////////////
981 //
982 // LookupInfo
983 //
984
lookup(ParmString word,const SensitiveCompare * c,char achar,WordEntry & o,GuessInfo * gi) const985 int LookupInfo::lookup (ParmString word, const SensitiveCompare * c,
986 char achar,
987 WordEntry & o, GuessInfo * gi) const
988 {
989 SpellerImpl::WS::const_iterator i = begin;
990 const char * g = 0;
991 if (mode == Word) {
992 do {
993 (*i)->lookup(word, c, o);
994 for (;!o.at_end(); o.adv()) {
995 if (TESTAFF(o.aff, achar))
996 return 1;
997 else
998 g = o.word;
999 }
1000 ++i;
1001 } while (i != end);
1002 } else if (mode == Clean) {
1003 do {
1004 (*i)->clean_lookup(word, o);
1005 for (;!o.at_end(); o.adv()) {
1006 if (TESTAFF(o.aff, achar))
1007 return 1;
1008 else
1009 g = o.word;
1010 }
1011 ++i;
1012 } while (i != end);
1013 } else if (gi) {
1014 g = gi->dup(word);
1015 }
1016 if (gi && g) {
1017 CheckInfo * ci = gi->add();
1018 ci->word = g;
1019 return -1;
1020 }
1021 return 0;
1022 }
1023
1024 //////////////////////////////////////////////////////////////////////
1025 //
1026 // Affix Entry
1027 //
1028
applicable(SimpleString word) const1029 bool PfxEntry::applicable(SimpleString word) const
1030 {
1031 unsigned int cond;
1032 /* make sure all conditions match */
1033 if ((word.size > stripl) && (word.size >= conds->num)) {
1034 const byte * cp = (const byte *) word.str;
1035 for (cond = 0; cond < conds->num; cond++) {
1036 if ((conds->get(*cp++) & (1 << cond)) == 0)
1037 break;
1038 }
1039 if (cond >= conds->num) return true;
1040 }
1041 return false;
1042 }
1043
1044 // add prefix to this word assuming conditions hold
add(SimpleString word,ObjStack & buf) const1045 SimpleString PfxEntry::add(SimpleString word, ObjStack & buf) const
1046 {
1047 unsigned int cond;
1048 /* make sure all conditions match */
1049 if ((word.size > stripl) && (word.size >= conds->num)) {
1050 const byte * cp = (const byte *) word.str;
1051 for (cond = 0; cond < conds->num; cond++) {
1052 if ((conds->get(*cp++) & (1 << cond)) == 0)
1053 break;
1054 }
1055 if (cond >= conds->num) {
1056 /* */
1057 int alen = word.size - stripl;
1058 char * newword = (char *)buf.alloc(alen + appndl + 1);
1059 if (appndl) memcpy(newword, appnd, appndl);
1060 memcpy(newword + appndl, word + stripl, alen + 1);
1061 return SimpleString(newword, alen + appndl);
1062 }
1063 }
1064 return SimpleString();
1065 }
1066
1067 // check if this prefix entry matches
check(const LookupInfo & linf,const AffixMgr * pmyMgr,ParmString word,CheckInfo & ci,GuessInfo * gi,bool cross) const1068 bool PfxEntry::check(const LookupInfo & linf, const AffixMgr * pmyMgr,
1069 ParmString word,
1070 CheckInfo & ci, GuessInfo * gi, bool cross) const
1071 {
1072 unsigned int cond; // condition number being examined
1073 unsigned tmpl; // length of tmpword
1074 WordEntry wordinfo; // hash entry of root word or NULL
1075 byte * cp;
1076 VARARRAYM(char, tmpword, word.size()+stripl+1, MAXWORDLEN+1);
1077
1078 // on entry prefix is 0 length or already matches the beginning of the word.
1079 // So if the remaining root word has positive length
1080 // and if there are enough chars in root word and added back strip chars
1081 // to meet the number of characters conditions, then test it
1082
1083 tmpl = word.size() - appndl;
1084
1085 if ((tmpl > 0) && (tmpl + stripl >= conds->num)) {
1086
1087 // generate new root word by removing prefix and adding
1088 // back any characters that would have been stripped
1089
1090 if (stripl) strcpy (tmpword, strip);
1091 strcpy ((tmpword + stripl), (word + appndl));
1092
1093 // now make sure all of the conditions on characters
1094 // are met. Please see the appendix at the end of
1095 // this file for more info on exactly what is being
1096 // tested
1097
1098 cp = (byte *)tmpword;
1099 for (cond = 0; cond < conds->num; cond++) {
1100 if ((conds->get(*cp++) & (1 << cond)) == 0) break;
1101 }
1102
1103 // if all conditions are met then check if resulting
1104 // root word in the dictionary
1105
1106 if (cond >= conds->num) {
1107 CheckInfo * lci = 0;
1108 CheckInfo * guess = 0;
1109 tmpl += stripl;
1110
1111 int res = linf.lookup(tmpword, &linf.sp->s_cmp_end, achar, wordinfo, gi);
1112
1113 if (res == 1) {
1114
1115 lci = &ci;
1116 lci->word = wordinfo.word;
1117 goto quit;
1118
1119 } else if (res == -1) {
1120
1121 guess = gi->head;
1122
1123 }
1124
1125 // prefix matched but no root word was found
1126 // if XPRODUCT is allowed, try again but now
1127 // cross checked combined with a suffix
1128
1129 if (gi)
1130 lci = gi->head;
1131
1132 if (cross && xpflg & XPRODUCT) {
1133 if (pmyMgr->suffix_check(linf, ParmString(tmpword, tmpl),
1134 ci, gi,
1135 XPRODUCT, (AffEntry *)this)) {
1136 lci = &ci;
1137
1138 } else if (gi) {
1139
1140 CheckInfo * stop = lci;
1141 for (lci = gi->head;
1142 lci != stop;
1143 lci = const_cast<CheckInfo *>(lci->next))
1144 {
1145 lci->pre_flag = achar;
1146 lci->pre_strip_len = stripl;
1147 lci->pre_add_len = appndl;
1148 lci->pre_add = appnd;
1149 }
1150
1151 } else {
1152
1153 lci = 0;
1154
1155 }
1156 }
1157
1158 if (guess)
1159 lci = guess;
1160
1161 quit:
1162 if (lci) {
1163 lci->pre_flag = achar;
1164 lci->pre_strip_len = stripl;
1165 lci->pre_add_len = appndl;
1166 lci->pre_add = appnd;
1167 }
1168 if (lci == &ci) return true;
1169 }
1170 }
1171 return false;
1172 }
1173
applicable(SimpleString word) const1174 bool SfxEntry::applicable(SimpleString word) const
1175 {
1176 int cond;
1177 /* make sure all conditions match */
1178 if ((word.size > stripl) && (word.size >= conds->num)) {
1179 const byte * cp = (const byte *) (word + word.size);
1180 for (cond = conds->num; --cond >=0; ) {
1181 if ((conds->get(*--cp) & (1 << cond)) == 0)
1182 break;
1183 }
1184 if (cond < 0) return true;
1185 }
1186 return false;
1187 }
1188
1189 // add suffix to this word assuming conditions hold
add(SimpleString word,ObjStack & buf,int limit,SimpleString orig_word) const1190 SimpleString SfxEntry::add(SimpleString word, ObjStack & buf,
1191 int limit, SimpleString orig_word) const
1192 {
1193 int cond;
1194 /* make sure all conditions match */
1195 if ((orig_word.size > stripl) && (orig_word.size >= conds->num)) {
1196 const byte * cp = (const byte *) (orig_word + orig_word.size);
1197 for (cond = conds->num; --cond >=0; ) {
1198 if ((conds->get(*--cp) & (1 << cond)) == 0)
1199 break;
1200 }
1201 if (cond < 0) {
1202 int alen = word.size - stripl;
1203 if (alen >= limit) return EMPTY;
1204 /* we have a match so add suffix */
1205 char * newword = (char *)buf.alloc(alen + appndl + 1);
1206 memcpy(newword, word, alen);
1207 memcpy(newword + alen, appnd, appndl + 1);
1208 return SimpleString(newword, alen + appndl);
1209 }
1210 }
1211 return SimpleString();
1212 }
1213
1214 // see if this suffix is present in the word
check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,int optflags,AffEntry * ppfx)1215 bool SfxEntry::check(const LookupInfo & linf, ParmString word,
1216 CheckInfo & ci, GuessInfo * gi,
1217 int optflags, AffEntry* ppfx)
1218 {
1219 unsigned tmpl; // length of tmpword
1220 int cond; // condition beng examined
1221 WordEntry wordinfo; // hash entry pointer
1222 byte * cp;
1223 VARARRAYM(char, tmpword, word.size()+stripl+1, MAXWORDLEN+1);
1224 PfxEntry* ep = (PfxEntry *) ppfx;
1225
1226 // if this suffix is being cross checked with a prefix
1227 // but it does not support cross products skip it
1228
1229 if ((optflags & XPRODUCT) != 0 && (xpflg & XPRODUCT) == 0)
1230 return false;
1231
1232 // upon entry suffix is 0 length or already matches the end of the word.
1233 // So if the remaining root word has positive length
1234 // and if there are enough chars in root word and added back strip chars
1235 // to meet the number of characters conditions, then test it
1236
1237 tmpl = word.size() - appndl;
1238
1239 if ((tmpl > 0) && (tmpl + stripl >= conds->num)) {
1240
1241 // generate new root word by removing suffix and adding
1242 // back any characters that would have been stripped or
1243 // or null terminating the shorter string
1244
1245 strcpy (tmpword, word);
1246 cp = (byte *)(tmpword + tmpl);
1247 if (stripl) {
1248 strcpy ((char *)cp, strip);
1249 tmpl += stripl;
1250 cp = (byte *)(tmpword + tmpl);
1251 } else *cp = '\0';
1252
1253 // now make sure all of the conditions on characters
1254 // are met. Please see the appendix at the end of
1255 // this file for more info on exactly what is being
1256 // tested
1257
1258 for (cond = conds->num; --cond >= 0; ) {
1259 if ((conds->get(*--cp) & (1 << cond)) == 0) break;
1260 }
1261
1262 // if all conditions are met then check if resulting
1263 // root word in the dictionary
1264
1265 if (cond < 0) {
1266 CheckInfo * lci = 0;
1267 tmpl += stripl;
1268 const SensitiveCompare * cmp =
1269 optflags & XPRODUCT ? &linf.sp->s_cmp_middle : &linf.sp->s_cmp_begin;
1270 int res = linf.lookup(tmpword, cmp, achar, wordinfo, gi);
1271 if (res == 1
1272 && ((optflags & XPRODUCT) == 0 || TESTAFF(wordinfo.aff, ep->achar)))
1273 {
1274 lci = &ci;
1275 lci->word = wordinfo.word;
1276 } else if (res == 1 && gi) {
1277 lci = gi->add();
1278 lci->word = wordinfo.word;
1279 } else if (res == -1) { // gi must be defined
1280 lci = gi->head;
1281 }
1282
1283 if (lci) {
1284 lci->suf_flag = achar;
1285 lci->suf_strip_len = stripl;
1286 lci->suf_add_len = appndl;
1287 lci->suf_add = appnd;
1288 }
1289
1290 if (lci == &ci) return true;
1291 }
1292 }
1293 return false;
1294 }
1295
1296 //////////////////////////////////////////////////////////////////////
1297 //
1298 // new_affix_mgr
1299 //
1300
1301
new_affix_mgr(ParmString name,Conv & iconv,const Language * lang)1302 PosibErr<AffixMgr *> new_affix_mgr(ParmString name,
1303 Conv & iconv,
1304 const Language * lang)
1305 {
1306 if (name == "none")
1307 return 0;
1308 //CERR << "NEW AFFIX MGR\n";
1309 String file;
1310 file += lang->data_dir();
1311 file += '/';
1312 file += lang->name();
1313 file += "_affix.dat";
1314 AffixMgr * affix;
1315 affix = new AffixMgr(lang);
1316 PosibErrBase pe = affix->setup(file, iconv);
1317 if (pe.has_err()) {
1318 delete affix;
1319 return pe;
1320 } else {
1321 return affix;
1322 }
1323 }
1324 }
1325
1326 /**************************************************************************
1327
1328 Appendix: Understanding Affix Code
1329
1330
1331 An affix is either a prefix or a suffix attached to root words to make
1332 other words.
1333
1334 Basically a Prefix or a Suffix is set of AffEntry objects
1335 which store information about the prefix or suffix along
1336 with supporting routines to check if a word has a particular
1337 prefix or suffix or a combination.
1338
1339 The structure affentry is defined as follows:
1340
1341 struct AffEntry
1342 {
1343 unsigned char achar; // char used to represent the affix
1344 char * strip; // string to strip before adding affix
1345 char * appnd; // the affix string to add
1346 short stripl; // length of the strip string
1347 short appndl; // length of the affix string
1348 short numconds; // the number of conditions that must be met
1349 short xpflg; // flag: XPRODUCT- combine both prefix and suffix
1350 char conds[SETSIZE]; // array which encodes the conditions to be met
1351 };
1352
1353
1354 Here is a suffix borrowed from the en_US.aff file. This file
1355 is whitespace delimited.
1356
1357 SFX D Y 4
1358 SFX D 0 e d
1359 SFX D y ied [^aeiou]y
1360 SFX D 0 ed [^ey]
1361 SFX D 0 ed [aeiou]y
1362
1363 This information can be interpreted as follows:
1364
1365 In the first line has 4 fields
1366
1367 Field
1368 -----
1369 1 SFX - indicates this is a suffix
1370 2 D - is the name of the character flag which represents this suffix
1371 3 Y - indicates it can be combined with prefixes (cross product)
1372 4 4 - indicates that sequence of 4 affentry structures are needed to
1373 properly store the affix information
1374
1375 The remaining lines describe the unique information for the 4 SfxEntry
1376 objects that make up this affix. Each line can be interpreted
1377 as follows: (note fields 1 and 2 are as a check against line 1 info)
1378
1379 Field
1380 -----
1381 1 SFX - indicates this is a suffix
1382 2 D - is the name of the character flag for this affix
1383 3 y - the string of chars to strip off before adding affix
1384 (a 0 here indicates the NULL string)
1385 4 ied - the string of affix characters to add
1386 5 [^aeiou]y - the conditions which must be met before the affix
1387 can be applied
1388
1389 Field 5 is interesting. Since this is a suffix, field 5 tells us that
1390 there are 2 conditions that must be met. The first condition is that
1391 the next to the last character in the word must *NOT* be any of the
1392 following "a", "e", "i", "o" or "u". The second condition is that
1393 the last character of the word must end in "y".
1394
1395 So how can we encode this information concisely and be able to
1396 test for both conditions in a fast manner? The answer is found
1397 but studying the wonderful ispell code of Geoff Kuenning, et.al.
1398 (now available under a normal BSD license).
1399
1400 If we set up a conds array of 256 bytes indexed (0 to 255) and access it
1401 using a character (cast to an unsigned char) of a string, we have 8 bits
1402 of information we can store about that character. Specifically we
1403 could use each bit to say if that character is allowed in any of the
1404 last (or first for prefixes) 8 characters of the word.
1405
1406 Basically, each character at one end of the word (up to the number
1407 of conditions) is used to index into the conds array and the resulting
1408 value found there says whether the that character is valid for a
1409 specific character position in the word.
1410
1411 For prefixes, it does this by setting bit 0 if that char is valid
1412 in the first position, bit 1 if valid in the second position, and so on.
1413
1414 If a bit is not set, then that char is not valid for that position in the
1415 word.
1416
1417 If working with suffixes bit 0 is used for the character closest
1418 to the front, bit 1 for the next character towards the end, ...,
1419 with bit numconds-1 representing the last char at the end of the string.
1420
1421 Note: since entries in the conds[] are 8 bits, only 8 conditions
1422 (read that only 8 character positions) can be examined at one
1423 end of a word (the beginning for prefixes and the end for suffixes.
1424
1425 So to make this clearer, lets encode the conds array values for the
1426 first two affentries for the suffix D described earlier.
1427
1428
1429 For the first affentry:
1430 numconds = 1 (only examine the last character)
1431
1432 conds['e'] = (1 << 0) (the word must end in an E)
1433 all others are all 0
1434
1435 For the second affentry:
1436 numconds = 2 (only examine the last two characters)
1437
1438 conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
1439 where X is all characters *but* a, e, i, o, or u
1440
1441
1442 conds['y'] = (1 << 1) (the last char must be a y)
1443 all other bits for all other entries in the conds array are zero
1444
1445
1446 **************************************************************************/
1447