1 // This file is part of The New Aspell
2 // Copyright (C) 2004 by Kevin Atkinson under the GNU LGPL
3 // license version 2.0 or 2.1. You should have received a copy of the
4 // LGPL license along with this library if you did not you can find it
5 // at http://www.gnu.org/.
6 //
7 // This code is based on the the MySpell affix code:
8 //
9 /*
10 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada And
11 * Contributors. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 *
24 * 3. All modifications to the source code must be clearly marked as
25 * such. Binary redistributions based on modified source code
26 * must be clearly marked as modified versions in the documentation
27 * and/or other materials provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
32 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
33 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
34 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
36 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 */
43
44 #include <cstdlib>
45 #include <cstring>
46 #include <cstdio>
47
48 //#include "iostream.hpp"
49
50 #include "affix.hpp"
51 #include "errors.hpp"
52 #include "getdata.hpp"
53 #include "parm_string.hpp"
54 #include "check_list.hpp"
55 #include "speller_impl.hpp"
56 #include "vararray.hpp"
57 #include "lsort.hpp"
58 #include "hash-t.hpp"
59
60 #include "gettext.h"
61
62 using namespace std;
63
64 namespace aspeller {
65
66 typedef unsigned char byte;
67 static char EMPTY[1] = {0};
68
69 //////////////////////////////////////////////////////////////////////
70 //
71 // Entry struct definations
72 //
73
74 struct Conds
75 {
76 char * str;
77 unsigned num;
78 char conds[SETSIZE];
getaspeller::Conds79 char get(byte i) const {return conds[i];}
80 };
81
82 struct AffEntry
83 {
84 const char * appnd;
85 const char * strip;
86 byte appndl;
87 byte stripl;
88 byte xpflg;
89 char achar;
90 const Conds * conds;
91 //unsigned int numconds;
92 //char conds[SETSIZE];
93 };
94
95 // A Prefix Entry
96
97 struct PfxEntry : public AffEntry
98 {
99 PfxEntry * next;
100 PfxEntry * next_eq;
101 PfxEntry * next_ne;
102 PfxEntry * flag_next;
PfxEntryaspeller::PfxEntry103 PfxEntry() {}
104
105 bool check(const LookupInfo &, const AffixMgr * pmyMgr,
106 ParmString, CheckInfo &, GuessInfo *, bool cross = true) const;
107
allow_crossaspeller::PfxEntry108 inline bool allow_cross() const { return ((xpflg & XPRODUCT) != 0); }
flagaspeller::PfxEntry109 inline byte flag() const { return achar; }
keyaspeller::PfxEntry110 inline const char * key() const { return appnd; }
111 bool applicable(SimpleString) const;
112 SimpleString add(SimpleString, ObjStack & buf) const;
113 };
114
115 // A Suffix Entry
116
117 struct SfxEntry : public AffEntry
118 {
119 const char * rappnd; // this is set in AffixMgr::build_sfxlist
120
121 SfxEntry * next;
122 SfxEntry * next_eq;
123 SfxEntry * next_ne;
124 SfxEntry * flag_next;
125
SfxEntryaspeller::SfxEntry126 SfxEntry() {}
127
128 bool check(const LookupInfo &, ParmString, CheckInfo &, GuessInfo *,
129 int optflags, AffEntry * ppfx);
130
allow_crossaspeller::SfxEntry131 inline bool allow_cross() const { return ((xpflg & XPRODUCT) != 0); }
flagaspeller::SfxEntry132 inline byte flag() const { return achar; }
keyaspeller::SfxEntry133 inline const char * key() const { return rappnd; }
134 bool applicable(SimpleString) const;
135 SimpleString add(SimpleString, ObjStack & buf, int limit, SimpleString) const;
136 };
137
138 //////////////////////////////////////////////////////////////////////
139 //
140 // Utility functions declarations
141 //
142
143 /* return 1 if s1 is subset of s2 */
isSubset(const char * s1,const char * s2)144 static bool isSubset(const char * s1, const char * s2)
145 {
146 while( *s1 && (*s1 == *s2) ) {
147 s1++;
148 s2++;
149 }
150 return (*s1 == '\0');
151 }
152
153 // return 1 if s1 (reversed) is a leading subset of end of s2
isRevSubset(const char * s1,const char * end_of_s2,int len)154 static bool isRevSubset(const char * s1, const char * end_of_s2, int len)
155 {
156 while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
157 s1++;
158 end_of_s2--;
159 len --;
160 }
161 return (*s1 == '\0');
162 }
163
164 template <class T>
165 struct AffixLess
166 {
operator ()aspeller::AffixLess167 bool operator() (T * x, T * y) const {return strcmp(x->key(),y->key()) < 0;}
168 };
169
170 // struct StringLookup {
171 // struct Parms {
172 // typedef const char * Value;
173 // typedef const char * Key;
174 // static const bool is_multi = false;
175 // hash<const char *> hfun;
176 // size_t hash(const char * s) {return hfun(s);}
177 // bool equal(const char * x, const char * y) {return strcmp(x,y) == 0;}
178 // const char * key(const char * c) {return c;}
179 // };
180 // typedef HashTable<Parms> Lookup;
181 // Lookup lookup;
182 // ObjStack * data_buf;
183 // StringLookup(ObjStack * b) : data_buf(b) {}
184 // const char * dup(const char * orig) {
185 // pair<Lookup::iterator, bool> res = lookup.insert(orig);
186 // if (res.second) *res.first = data_buf->dup(orig);
187 // return *res.first;
188 // //return data_buf->dup(orig);
189 // }
190 // };
191
192 struct CondsLookupParms {
193 typedef const Conds * Value;
194 typedef const char * Key;
195 static const bool is_multi = false;
196 acommon::hash<const char *> hfun;
hashaspeller::CondsLookupParms197 size_t hash(const char * s) {return hfun(s);}
equalaspeller::CondsLookupParms198 bool equal(const char * x, const char * y) {return strcmp(x,y) == 0;}
keyaspeller::CondsLookupParms199 const char * key(const Conds * c) {return c->str;}
200 };
201
202 typedef HashTable<CondsLookupParms> CondsLookup;
203
204 // normalizes and checks the cond_str
205 // returns the length of the new string or -1 if invalid
normalize_cond_str(char * str)206 static int normalize_cond_str(char * str)
207 {
208 char * s = str;
209 char * d = str;
210 while (*s) {
211 if (*s != '[') {
212 *d++ = *s++;
213 } else if (s[1] == '\0' || s[1] == ']') {
214 return -1;
215 } else if (s[2] == ']') {
216 *d++ = s[1];
217 s += 3;
218 } else {
219 *d++ = *s++;
220 if (*s == '^') *d++ = *s++;
221 while (*s != ']') {
222 if (*s == '\0' || *s == '[') return -1;
223 char * min = s;
224 for (char * i = s + 1; *i != ']'; ++i) {
225 if ((byte)*i < (byte)*min) min = i;}
226 char c = *s;
227 *d++ = *min;
228 *min = c;
229 ++s;
230 }
231 *d++ = *s++;
232 }
233 }
234 *d = '\0';
235 return d - str;
236 }
237
238 static void encodeit(CondsLookup &, ObjStack &,
239 AffEntry * ptr, char * cs);
240
241 //////////////////////////////////////////////////////////////////////
242 //
243 // Affix Manager
244 //
245
setup(ParmString affpath,Conv & iconv)246 PosibErr<void> AffixMgr::setup(ParmString affpath, Conv & iconv)
247 {
248 // register hash manager and load affix data from aff file
249 //cpdmin = 3; // default value
250 max_strip_ = 0;
251 for (int i=0; i < SETSIZE; i++) {
252 pStart[i] = NULL;
253 sStart[i] = NULL;
254 pFlag[i] = NULL;
255 sFlag[i] = NULL;
256 max_strip_f[i] = 0;
257 }
258 return parse_file(affpath, iconv);
259 }
260
AffixMgr(const Language * l)261 AffixMgr::AffixMgr(const Language * l)
262 : lang(l), data_buf(1024*16) {}
263
~AffixMgr()264 AffixMgr::~AffixMgr() {}
265
max_(int & lhs,int rhs)266 static inline void max_(int & lhs, int rhs)
267 {
268 if (lhs < rhs) lhs = rhs;
269 }
270
271 // read in aff file and build up prefix and suffix entry objects
parse_file(const char * affpath,Conv & iconv)272 PosibErr<void> AffixMgr::parse_file(const char * affpath, Conv & iconv)
273 {
274 // io buffers
275 String buf; DataPair dp;
276
277 CondsLookup conds_lookup;
278
279 // open the affix file
280 affix_file = data_buf.dup(affpath);
281 FStream afflst;
282 RET_ON_ERR(afflst.open(affpath,"r"));
283
284 // step one is to parse the affix file building up the internal
285 // affix data structures
286
287 // read in each line ignoring any that do not
288 // start with a known line type indicator
289
290 char prev_aff = '\0';
291
292 while (getdata_pair(afflst,dp,buf)) {
293 char affix_type = ' ';
294
295 /* parse in the name of the character set used by the .dict and .aff */
296
297 if (dp.key == "SET") {
298 String buf;
299 encoding = data_buf.dup(fix_encoding_str(dp.value, buf));
300 if (strcmp(encoding, lang->data_encoding()) != 0)
301 return make_err(incorrect_encoding, affix_file, lang->data_encoding(), encoding);
302 }
303
304 /* parse in the flag used by the controlled compound words */
305 //else if (d.key == "COMPOUNDFLAG")
306 // compound = data_buf.dup(d.value);
307
308 /* parse in the flag used by the controlled compound words */
309 //else if (d.key == "COMPOUNDMIN")
310 // cpdmin = atoi(d.value); // FiXME
311
312 //else if (dp.key == "TRY" || dp.key == "REP");
313
314 else if (dp.key == "PFX" || dp.key == "SFX")
315 affix_type = dp.key[0];
316
317 if (affix_type == ' ') continue;
318
319 //
320 // parse this affix: P - prefix, S - suffix
321 //
322
323 int numents = 0; // number of affentry structures to parse
324 char achar='\0'; // affix char identifier
325 short xpflg=0;
326 AffEntry * nptr;
327 {
328 // split affix header line into pieces
329 split(dp);
330 if (dp.key.empty()) goto error;
331 // key is affix char
332 const char * astr = iconv(dp.key);
333 if (astr[0] == '\0' || astr[1] != '\0') goto error;
334 achar = astr[0];
335 if (achar == prev_aff) goto error_count;
336 prev_aff = achar;
337
338 split(dp);
339 if (dp.key.size != 1 ||
340 !(dp.key[0] == 'Y' || dp.key[0] == 'N')) goto error;
341 // key is cross product indicator
342 if (dp.key[0] == 'Y') xpflg = XPRODUCT;
343
344 split(dp);
345 if (dp.key.empty()) goto error;
346 // key is number of affentries
347
348 numents = atoi(dp.key);
349
350 for (int j = 0; j < numents; j++) {
351 getdata_pair(afflst, dp, buf);
352
353 if (affix_type == 'P') {
354 nptr = (AffEntry *) data_buf.alloc_bottom(sizeof(PfxEntry));
355 new (nptr) PfxEntry;
356 } else {
357 nptr = (AffEntry *) data_buf.alloc_bottom(sizeof(SfxEntry));
358 new (nptr) SfxEntry;
359 }
360
361 nptr->xpflg = xpflg;
362
363 split(dp);
364 if (dp.key.empty()) goto error;
365 // key is affix charter
366 if (iconv(dp.key)[0] != achar) goto error_count;
367 nptr->achar = achar;
368
369 split(dp);
370 if (dp.key.empty()) goto error;
371 // key is strip
372 if (dp.key != "0") {
373 ParmString s0(iconv(dp.key));
374 max_(max_strip_, s0.size());
375 max_(max_strip_f[(byte)achar], s0.size());
376 nptr->strip = data_buf.dup(s0);
377 nptr->stripl = s0.size();
378 } else {
379 nptr->strip = "";
380 nptr->stripl = 0;
381 }
382
383 split(dp);
384 if (dp.key.empty()) goto error;
385 // key is affix string or 0 for null
386 if (dp.key != "0") {
387 nptr->appnd = data_buf.dup(iconv(dp.key));
388 nptr->appndl = strlen(nptr->appnd);
389 } else {
390 nptr->appnd = "";
391 nptr->appndl = 0;
392 }
393
394 split(dp);
395 if (dp.key.empty()) goto error;
396 // key is the conditions descriptions
397 char * cond = iconv(dp.key);
398 int cond_len = normalize_cond_str(cond);
399 if (cond_len < 0)
400 return (make_err(invalid_cond, MsgConv(lang)(cond))
401 .with_file(affix_file, dp.line_num));
402 if (nptr->stripl != 0) {
403 char * cc = cond;
404 if (affix_type == 'S') cc += cond_len - nptr->stripl;
405 if (cond_len < nptr->stripl ||
406 memcmp(cc, nptr->strip, nptr->stripl) != 0)
407 return (make_err(invalid_cond_strip,
408 MsgConv(lang)(cond), MsgConv(lang)(nptr->strip))
409 .with_file(affix_file, dp.line_num));
410 }
411 encodeit(conds_lookup, data_buf, nptr, cond);
412
413 // now create SfxEntry or PfxEntry objects and use links to
414 // build an ordered (sorted by affix string) list
415 if (affix_type == 'P')
416 build_pfxlist(static_cast<PfxEntry *>(nptr));
417 else
418 build_sfxlist(static_cast<SfxEntry *>(nptr));
419 }
420 }
421 continue;
422 error:
423 return make_err(corrupt_affix, MsgConv(lang)(achar)).with_file(affix_file, dp.line_num);
424 error_count:
425 return make_err(corrupt_affix, MsgConv(lang)(achar),
426 _("Possibly incorrect count.")).with_file(affix_file, dp.line_num);
427 }
428 afflst.close();
429
430 // now we can speed up performance greatly taking advantage of the
431 // relationship between the affixes and the idea of "subsets".
432
433 // View each prefix as a potential leading subset of another and view
434 // each suffix (reversed) as a potential trailing subset of another.
435
436 // To illustrate this relationship if we know the prefix "ab" is
437 // found in the word to examine, only prefixes that "ab" is a
438 // leading subset of need be examined. Furthermore is "ab" is not
439 // present then none of the prefixes that "ab" is is a subset need
440 // be examined.
441
442 // The same argument goes for suffix string that are reversed.
443
444 // Then to top this off why not examine the first char of the word
445 // to quickly limit the set of prefixes to examine (i.e. the
446 // prefixes to examine must be leading supersets of the first
447 // character of the word (if they exist)
448
449 // To take advantage of this "subset" relationship, we need to add
450 // two links from entry. One to take next if the current prefix
451 // is found (call it nexteq) and one to take next if the current
452 // prefix is not found (call it nextne).
453
454 // Since we have built ordered lists, all that remains is to
455 // properly initialize the nextne and nexteq pointers that relate
456 // them
457
458 process_pfx_order();
459 process_sfx_order();
460
461 //CERR.printf("%u\n", data_buf.calc_size()/1024);
462
463 return no_err;
464
465 }
466
467
468 // we want to be able to quickly access prefix information
469 // both by prefix flag, and sorted by prefix string itself
470 // so we need to set up two indexes
471
build_pfxlist(PfxEntry * pfxptr)472 PosibErr<void> AffixMgr::build_pfxlist(PfxEntry* pfxptr)
473 {
474 PfxEntry * ptr;
475 PfxEntry * ep = pfxptr;
476
477 // get the right starting point
478 const char * key = ep->key();
479 const byte flg = ep->flag();
480
481 // first index by flag which must exist
482 ptr = pFlag[flg];
483 ep->flag_next = ptr;
484 pFlag[flg] = ep;
485
486 // next insert the affix string, it will be sorted latter
487
488 byte sp = *((const byte *)key);
489 ptr = pStart[sp];
490 ep->next = ptr;
491 pStart[sp] = ep;
492 return no_err;
493 }
494
495 // we want to be able to quickly access suffix information
496 // both by suffix flag, and sorted by the reverse of the
497 // suffix string itself; so we need to set up two indexes
498
build_sfxlist(SfxEntry * sfxptr)499 PosibErr<void> AffixMgr::build_sfxlist(SfxEntry* sfxptr)
500 {
501 SfxEntry * ptr;
502 SfxEntry * ep = sfxptr;
503 char * tmp = (char *)data_buf.alloc(sfxptr->appndl + 1);
504 sfxptr->rappnd = tmp;
505
506 // reverse the string
507 char * dest = tmp + sfxptr->appndl;
508 *dest-- = 0;
509 const char * src = sfxptr->appnd;
510 for (; dest >= tmp; --dest, ++src)
511 *dest = *src;
512
513 /* get the right starting point */
514 const char * key = ep->key();
515 const byte flg = ep->flag();
516
517 // first index by flag which must exist
518 ptr = sFlag[flg];
519 ep->flag_next = ptr;
520 sFlag[flg] = ep;
521
522 // next insert the affix string, it will be sorted latter
523
524 byte sp = *((const byte *)key);
525 ptr = sStart[sp];
526 ep->next = ptr;
527 sStart[sp] = ep;
528 return no_err;
529 }
530
531
532
533 // initialize the PfxEntry links NextEQ and NextNE to speed searching
process_pfx_order()534 PosibErr<void> AffixMgr::process_pfx_order()
535 {
536 PfxEntry* ptr;
537
538 // loop through each prefix list starting point
539 for (int i=1; i < SETSIZE; i++) {
540
541 ptr = pStart[i];
542
543 if (ptr && ptr->next)
544 ptr = pStart[i] = sort(ptr, AffixLess<PfxEntry>());
545
546 // look through the remainder of the list
547 // and find next entry with affix that
548 // the current one is not a subset of
549 // mark that as destination for NextNE
550 // use next in list that you are a subset
551 // of as NextEQ
552
553 for (; ptr != NULL; ptr = ptr->next) {
554
555 PfxEntry * nptr = ptr->next;
556 for (; nptr != NULL; nptr = nptr->next) {
557 if (! isSubset( ptr->key() , nptr->key() )) break;
558 }
559 ptr->next_ne = nptr;
560 ptr->next_eq = NULL;
561 if ((ptr->next) && isSubset(ptr->key() ,
562 (ptr->next)->key()))
563 ptr->next_eq = ptr->next;
564 }
565
566 // now clean up by adding smart search termination strings
567 // if you are already a superset of the previous prefix
568 // but not a subset of the next, search can end here
569 // so set NextNE properly
570
571 ptr = pStart[i];
572 for (; ptr != NULL; ptr = ptr->next) {
573 PfxEntry * nptr = ptr->next;
574 PfxEntry * mptr = NULL;
575 for (; nptr != NULL; nptr = nptr->next) {
576 if (! isSubset(ptr->key(),nptr->key())) break;
577 mptr = nptr;
578 }
579 if (mptr) mptr->next_ne = NULL;
580 }
581 }
582 return no_err;
583 }
584
585
586
587 // initialize the SfxEntry links NextEQ and NextNE to speed searching
process_sfx_order()588 PosibErr<void> AffixMgr::process_sfx_order()
589 {
590 SfxEntry* ptr;
591
592 // loop through each prefix list starting point
593 for (int i=1; i < SETSIZE; i++) {
594
595 ptr = sStart[i];
596
597 if (ptr && ptr->next)
598 ptr = sStart[i] = sort(ptr, AffixLess<SfxEntry>());
599
600 // look through the remainder of the list
601 // and find next entry with affix that
602 // the current one is not a subset of
603 // mark that as destination for NextNE
604 // use next in list that you are a subset
605 // of as NextEQ
606
607 for (; ptr != NULL; ptr = ptr->next) {
608 SfxEntry * nptr = ptr->next;
609 for (; nptr != NULL; nptr = nptr->next) {
610 if (! isSubset(ptr->key(),nptr->key())) break;
611 }
612 ptr->next_ne = nptr;
613 ptr->next_eq = NULL;
614 if ((ptr->next) && isSubset(ptr->key(),(ptr->next)->key()))
615 ptr->next_eq = ptr->next;
616 }
617
618
619 // now clean up by adding smart search termination strings:
620 // if you are already a superset of the previous suffix
621 // but not a subset of the next, search can end here
622 // so set NextNE properly
623
624 ptr = sStart[i];
625 for (; ptr != NULL; ptr = ptr->next) {
626 SfxEntry * nptr = ptr->next;
627 SfxEntry * mptr = NULL;
628 for (; nptr != NULL; nptr = nptr->next) {
629 if (! isSubset(ptr->key(),nptr->key())) break;
630 mptr = nptr;
631 }
632 if (mptr) mptr->next_ne = NULL;
633 }
634 }
635 return no_err;
636 }
637
638 // takes aff file condition string and creates the
639 // conds array - please see the appendix at the end of the
640 // file affentry.cxx which describes what is going on here
641 // in much more detail
642
encodeit(CondsLookup & l,ObjStack & buf,AffEntry * ptr,char * cs)643 static void encodeit(CondsLookup & l, ObjStack & buf,
644 AffEntry * ptr, char * cs)
645 {
646 byte c;
647 int i, j, k;
648
649 // see if we already have this conds matrix
650
651 CondsLookup::iterator itr = l.find(cs);
652 if (!(itr == l.end())) {
653 ptr->conds = *itr;
654 return;
655 }
656
657 Conds * cds = (Conds *)buf.alloc_bottom(sizeof(Conds));
658 cds->str = buf.dup(cs);
659 l.insert(cds);
660 ptr->conds = cds;
661
662 int nc = strlen(cs);
663 VARARRAYM(byte, mbr, nc + 1, MAXLNLEN);
664
665 // now clear the conditions array
666 memset(cds->conds, 0, sizeof(cds->conds));
667
668 // now parse the string to create the conds array
669
670 int neg = 0; // complement indicator
671 int grp = 0; // group indicator
672 int n = 0; // number of conditions
673 int ec = 0; // end condition indicator
674 int nm = 0; // number of member in group
675
676 // if no condition just return
677 if (strcmp(cs,".")==0) {
678 cds->num = 0;
679 return;
680 }
681
682 i = 0;
683 while (i < nc) {
684 c = *((byte *)(cs + i));
685
686 // start group indicator
687 if (c == '[') {
688 grp = 1;
689 c = 0;
690 }
691
692 // complement flag
693 if ((grp == 1) && (c == '^')) {
694 neg = 1;
695 c = 0;
696 }
697
698 // end goup indicator
699 if (c == ']') {
700 ec = 1;
701 c = 0;
702 }
703
704 // add character of group to list
705 if ((grp == 1) && (c != 0)) {
706 *(mbr + nm) = c;
707 nm++;
708 c = 0;
709 }
710
711 // end of condition
712 if (c != 0) {
713 ec = 1;
714 }
715
716
717 if (ec) {
718 if (grp == 1) {
719 if (neg == 0) {
720 // set the proper bits in the condition array vals for those chars
721 for (j=0;j<nm;j++) {
722 k = (unsigned int) mbr[j];
723 cds->conds[k] = cds->conds[k] | (1 << n);
724 }
725 } else {
726 // complement so set all of them and then unset indicated ones
727 for (j=0;j<SETSIZE;j++) cds->conds[j] = cds->conds[j] | (1 << n);
728 for (j=0;j<nm;j++) {
729 k = (unsigned int) mbr[j];
730 cds->conds[k] = cds->conds[k] & ~(1 << n);
731 }
732 }
733 neg = 0;
734 grp = 0;
735 nm = 0;
736 } else {
737 // not a group so just set the proper bit for this char
738 // but first handle special case of . inside condition
739 if (c == '.') {
740 // wild card character so set them all
741 for (j=0;j<SETSIZE;j++) cds->conds[j] = cds->conds[j] | (1 << n);
742 } else {
743 cds->conds[(unsigned int)c] = cds->conds[(unsigned int)c] | (1 << n);
744 }
745 }
746 n++;
747 ec = 0;
748 }
749
750
751 i++;
752 }
753 cds->num = n;
754 return;
755 }
756
757
758 // check word for prefixes
prefix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,bool cross) const759 bool AffixMgr::prefix_check (const LookupInfo & linf, ParmString word,
760 CheckInfo & ci, GuessInfo * gi, bool cross) const
761 {
762 if (word.empty()) return false;
763
764 // first handle the special case of 0 length prefixes
765 PfxEntry * pe = pStart[0];
766 while (pe) {
767 if (pe->check(linf,this,word,ci,gi)) return true;
768 pe = pe->next;
769 }
770
771 // now handle the general case
772 byte sp = *reinterpret_cast<const byte *>(word.str());
773 PfxEntry * pptr = pStart[sp];
774
775 while (pptr) {
776 if (isSubset(pptr->key(),word)) {
777 if (pptr->check(linf,this,word,ci,gi,cross)) return true;
778 pptr = pptr->next_eq;
779 } else {
780 pptr = pptr->next_ne;
781 }
782 }
783
784 return false;
785 }
786
787
788 // check word for suffixes
suffix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,int sfxopts,AffEntry * ppfx) const789 bool AffixMgr::suffix_check (const LookupInfo & linf, ParmString word,
790 CheckInfo & ci, GuessInfo * gi,
791 int sfxopts, AffEntry * ppfx) const
792 {
793 if (word.empty()) return false;
794
795 // first handle the special case of 0 length suffixes
796 SfxEntry * se = sStart[0];
797 while (se) {
798 if (se->check(linf, word, ci, gi, sfxopts, ppfx)) return true;
799 se = se->next;
800 }
801
802 // now handle the general case
803 byte sp = *((const byte *)(word + word.size() - 1));
804 SfxEntry * sptr = sStart[sp];
805
806 while (sptr) {
807 if (isRevSubset(sptr->key(), word + word.size() - 1, word.size())) {
808 if (sptr->check(linf, word, ci, gi, sfxopts, ppfx)) return true;
809 sptr = sptr->next_eq;
810 } else {
811 sptr = sptr->next_ne;
812 }
813 }
814
815 return false;
816 }
817
818 // check if word with affixes is correctly spelled
affix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi) const819 bool AffixMgr::affix_check(const LookupInfo & linf, ParmString word,
820 CheckInfo & ci, GuessInfo * gi) const
821 {
822 if (word.empty()) return false;
823
824 // Deal With Case in a semi-intelligent manner
825 CasePattern cp = lang->LangImpl::case_pattern(word);
826 ParmString pword = word;
827 ParmString sword = word;
828 CharVector lower;
829 if (cp == FirstUpper) {
830 lower.append(word, word.size() + 1);
831 lower[0] = lang->to_lower(word[0]);
832 pword = ParmString(lower.data(), lower.size() - 1);
833 } else if (cp == AllUpper) {
834 lower.resize(word.size() + 1);
835 unsigned int i = 0;
836 for (; i != word.size(); ++i)
837 lower[i] = lang->to_lower(word[i]);
838 lower[i] = '\0';
839 pword = ParmString(lower.data(), lower.size() - 1);
840 sword = pword;
841 }
842
843 // check all prefixes (also crossed with suffixes if allowed)
844 if (prefix_check(linf, pword, ci, gi)) return true;
845
846 // if still not found check all suffixes
847 if (suffix_check(linf, sword, ci, gi, 0, NULL)) return true;
848
849 // if still not found check again but with the lower case version
850 // which can make a difference if the entire word matches the cond
851 // string
852 if (cp == FirstUpper) {
853 return suffix_check(linf, pword, ci, gi, 0, NULL);
854 } else {
855 return false;
856 }
857 }
858
munch(ParmString word,GuessInfo * gi,bool cross) const859 void AffixMgr::munch(ParmString word, GuessInfo * gi, bool cross) const
860 {
861 LookupInfo li(0, LookupInfo::AlwaysTrue);
862 CheckInfo ci;
863 gi->reset();
864 CasePattern cp = lang->LangImpl::case_pattern(word);
865 if (cp == AllUpper) return;
866 if (cp != FirstUpper)
867 prefix_check(li, word, ci, gi, cross);
868 suffix_check(li, word, ci, gi, 0, NULL);
869 }
870
expand(ParmString word,ParmString aff,ObjStack & buf,int limit) const871 WordAff * AffixMgr::expand(ParmString word, ParmString aff,
872 ObjStack & buf, int limit) const
873 {
874 byte * empty = (byte *)buf.alloc(1);
875 *empty = 0;
876
877 byte * suf = (byte *)buf.alloc(aff.size() + 1);
878 byte * suf_e = suf;
879 byte * csuf = (byte *)buf.alloc(aff.size() + 1);
880 byte * csuf_e = csuf;
881
882 WordAff * head = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
883 WordAff * cur = head;
884 cur->word = buf.dup(word);
885 cur->aff = suf;
886
887 for (const byte * c = (const byte *)aff.str(), * end = c + aff.size();
888 c != end;
889 ++c)
890 {
891 if (sFlag[*c]) *suf_e++ = *c;
892 if (sFlag[*c] && sFlag[*c]->allow_cross()) *csuf_e++ = *c;
893
894 for (PfxEntry * p = pFlag[*c]; p; p = p->flag_next) {
895 SimpleString newword = p->add(word, buf);
896 if (!newword) continue;
897 cur->next = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
898 cur = cur->next;
899 cur->word = newword;
900 cur->aff = p->allow_cross() ? csuf : empty;
901 }
902 }
903
904 *suf_e = 0;
905 *csuf_e = 0;
906 cur->next = 0;
907
908 if (limit == 0) return head;
909
910 WordAff * * end = &cur->next;
911 WordAff * * very_end = end;
912 size_t nsuf_s = suf_e - suf + 1;
913
914 for (WordAff * * cur = &head; cur != end; cur = &(*cur)->next) {
915 if ((int)(*cur)->word.size - max_strip_ >= limit) continue;
916 byte * nsuf = (byte *)buf.alloc(nsuf_s);
917 expand_suffix((*cur)->word, (*cur)->aff, buf, limit, nsuf, &very_end, word);
918 (*cur)->aff = nsuf;
919 }
920
921 return head;
922 }
923
expand_suffix(ParmString word,const byte * aff,ObjStack & buf,int limit,byte * new_aff,WordAff *** l,ParmString orig_word) const924 WordAff * AffixMgr::expand_suffix(ParmString word, const byte * aff,
925 ObjStack & buf, int limit,
926 byte * new_aff, WordAff * * * l,
927 ParmString orig_word) const
928 {
929 WordAff * head = 0;
930 if (l) head = **l;
931 WordAff * * cur = l ? *l : &head;
932 bool expanded = false;
933 bool not_expanded = false;
934 if (!orig_word) orig_word = word;
935
936 while (*aff) {
937 if ((int)word.size() - max_strip_f[*aff] < limit) {
938 for (SfxEntry * p = sFlag[*aff]; p; p = p->flag_next) {
939 SimpleString newword = p->add(word, buf, limit, orig_word);
940 if (!newword) continue;
941 if (newword == EMPTY) {not_expanded = true; continue;}
942 *cur = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
943 (*cur)->word = newword;
944 (*cur)->aff = (const byte *)EMPTY;
945 cur = &(*cur)->next;
946 expanded = true;
947 }
948 }
949 if (new_aff && (!expanded || not_expanded)) *new_aff++ = *aff;
950 ++aff;
951 }
952 *cur = 0;
953 if (new_aff) *new_aff = 0;
954 if (l) *l = cur;
955 return head;
956 }
957
check_affix(ParmString word,char aff) const958 CheckAffixRes AffixMgr::check_affix(ParmString word, char aff) const
959 {
960 CheckAffixRes res = InvalidAffix;
961
962 for (PfxEntry * p = pFlag[(unsigned char)aff]; p; p = p->flag_next) {
963 res = InapplicableAffix;
964 if (p->applicable(word)) return ValidAffix;
965 }
966
967 for (SfxEntry * p = sFlag[(unsigned char)aff]; p; p = p->flag_next) {
968 if (res == InvalidAffix) res = InapplicableAffix;
969 if (p->applicable(word)) return ValidAffix;
970 }
971
972 return res;
973 }
974
975
976
977 //////////////////////////////////////////////////////////////////////
978 //
979 // LookupInfo
980 //
981
lookup(ParmString word,const SensitiveCompare * c,char achar,WordEntry & o,GuessInfo * gi) const982 int LookupInfo::lookup (ParmString word, const SensitiveCompare * c,
983 char achar,
984 WordEntry & o, GuessInfo * gi) const
985 {
986 SpellerImpl::WS::const_iterator i = begin;
987 const char * g = 0;
988 if (mode == Word) {
989 do {
990 (*i)->lookup(word, c, o);
991 for (;!o.at_end(); o.adv()) {
992 if (TESTAFF(o.aff, achar))
993 return 1;
994 else
995 g = o.word;
996 }
997 ++i;
998 } while (i != end);
999 } else if (mode == Clean) {
1000 do {
1001 (*i)->clean_lookup(word, o);
1002 for (;!o.at_end(); o.adv()) {
1003 if (TESTAFF(o.aff, achar))
1004 return 1;
1005 else
1006 g = o.word;
1007 }
1008 ++i;
1009 } while (i != end);
1010 } else if (gi) {
1011 g = gi->dup(word);
1012 }
1013 if (gi && g) {
1014 CheckInfo * ci = gi->add();
1015 ci->word = g;
1016 return -1;
1017 }
1018 return 0;
1019 }
1020
1021 //////////////////////////////////////////////////////////////////////
1022 //
1023 // Affix Entry
1024 //
1025
applicable(SimpleString word) const1026 bool PfxEntry::applicable(SimpleString word) const
1027 {
1028 unsigned int cond;
1029 /* make sure all conditions match */
1030 if ((word.size > stripl) && (word.size >= conds->num)) {
1031 const byte * cp = (const byte *) word.str;
1032 for (cond = 0; cond < conds->num; cond++) {
1033 if ((conds->get(*cp++) & (1 << cond)) == 0)
1034 break;
1035 }
1036 if (cond >= conds->num) return true;
1037 }
1038 return false;
1039 }
1040
1041 // add prefix to this word assuming conditions hold
add(SimpleString word,ObjStack & buf) const1042 SimpleString PfxEntry::add(SimpleString word, ObjStack & buf) const
1043 {
1044 unsigned int cond;
1045 /* make sure all conditions match */
1046 if ((word.size > stripl) && (word.size >= conds->num)) {
1047 const byte * cp = (const byte *) word.str;
1048 for (cond = 0; cond < conds->num; cond++) {
1049 if ((conds->get(*cp++) & (1 << cond)) == 0)
1050 break;
1051 }
1052 if (cond >= conds->num) {
1053 /* */
1054 int alen = word.size - stripl;
1055 char * newword = (char *)buf.alloc(alen + appndl + 1);
1056 if (appndl) memcpy(newword, appnd, appndl);
1057 memcpy(newword + appndl, word + stripl, alen + 1);
1058 return SimpleString(newword, alen + appndl);
1059 }
1060 }
1061 return SimpleString();
1062 }
1063
1064 // check if this prefix entry matches
check(const LookupInfo & linf,const AffixMgr * pmyMgr,ParmString word,CheckInfo & ci,GuessInfo * gi,bool cross) const1065 bool PfxEntry::check(const LookupInfo & linf, const AffixMgr * pmyMgr,
1066 ParmString word,
1067 CheckInfo & ci, GuessInfo * gi, bool cross) const
1068 {
1069 unsigned int cond; // condition number being examined
1070 unsigned tmpl; // length of tmpword
1071 WordEntry wordinfo; // hash entry of root word or NULL
1072 byte * cp;
1073 VARARRAYM(char, tmpword, word.size()+stripl+1, MAXWORDLEN+1);
1074
1075 // on entry prefix is 0 length or already matches the beginning of the word.
1076 // So if the remaining root word has positive length
1077 // and if there are enough chars in root word and added back strip chars
1078 // to meet the number of characters conditions, then test it
1079
1080 tmpl = word.size() - appndl;
1081
1082 if ((tmpl > 0) && (tmpl + stripl >= conds->num)) {
1083
1084 // generate new root word by removing prefix and adding
1085 // back any characters that would have been stripped
1086
1087 if (stripl) strcpy (tmpword, strip);
1088 strcpy ((tmpword + stripl), (word + appndl));
1089
1090 // now make sure all of the conditions on characters
1091 // are met. Please see the appendix at the end of
1092 // this file for more info on exactly what is being
1093 // tested
1094
1095 cp = (byte *)tmpword;
1096 for (cond = 0; cond < conds->num; cond++) {
1097 if ((conds->get(*cp++) & (1 << cond)) == 0) break;
1098 }
1099
1100 // if all conditions are met then check if resulting
1101 // root word in the dictionary
1102
1103 if (cond >= conds->num) {
1104 CheckInfo * lci = 0;
1105 CheckInfo * guess = 0;
1106 tmpl += stripl;
1107
1108 int res = linf.lookup(tmpword, &linf.sp->s_cmp_end, achar, wordinfo, gi);
1109
1110 if (res == 1) {
1111
1112 lci = &ci;
1113 lci->word = wordinfo.word;
1114 goto quit;
1115
1116 } else if (res == -1) {
1117
1118 guess = gi->head;
1119
1120 }
1121
1122 // prefix matched but no root word was found
1123 // if XPRODUCT is allowed, try again but now
1124 // cross checked combined with a suffix
1125
1126 if (gi)
1127 lci = gi->head;
1128
1129 if (cross && xpflg & XPRODUCT) {
1130 if (pmyMgr->suffix_check(linf, ParmString(tmpword, tmpl),
1131 ci, gi,
1132 XPRODUCT, (AffEntry *)this)) {
1133 lci = &ci;
1134
1135 } else if (gi) {
1136
1137 CheckInfo * stop = lci;
1138 for (lci = gi->head;
1139 lci != stop;
1140 lci = const_cast<CheckInfo *>(lci->next))
1141 {
1142 lci->pre_flag = achar;
1143 lci->pre_strip_len = stripl;
1144 lci->pre_add_len = appndl;
1145 lci->pre_add = appnd;
1146 }
1147
1148 } else {
1149
1150 lci = 0;
1151
1152 }
1153 }
1154
1155 if (guess)
1156 lci = guess;
1157
1158 quit:
1159 if (lci) {
1160 lci->pre_flag = achar;
1161 lci->pre_strip_len = stripl;
1162 lci->pre_add_len = appndl;
1163 lci->pre_add = appnd;
1164 }
1165 if (lci == &ci) return true;
1166 }
1167 }
1168 return false;
1169 }
1170
applicable(SimpleString word) const1171 bool SfxEntry::applicable(SimpleString word) const
1172 {
1173 int cond;
1174 /* make sure all conditions match */
1175 if ((word.size > stripl) && (word.size >= conds->num)) {
1176 const byte * cp = (const byte *) (word + word.size);
1177 for (cond = conds->num; --cond >=0; ) {
1178 if ((conds->get(*--cp) & (1 << cond)) == 0)
1179 break;
1180 }
1181 if (cond < 0) return true;
1182 }
1183 return false;
1184 }
1185
1186 // add suffix to this word assuming conditions hold
add(SimpleString word,ObjStack & buf,int limit,SimpleString orig_word) const1187 SimpleString SfxEntry::add(SimpleString word, ObjStack & buf,
1188 int limit, SimpleString orig_word) const
1189 {
1190 int cond;
1191 /* make sure all conditions match */
1192 if ((orig_word.size > stripl) && (orig_word.size >= conds->num)) {
1193 const byte * cp = (const byte *) (orig_word + orig_word.size);
1194 for (cond = conds->num; --cond >=0; ) {
1195 if ((conds->get(*--cp) & (1 << cond)) == 0)
1196 break;
1197 }
1198 if (cond < 0) {
1199 int alen = word.size - stripl;
1200 if (alen >= limit) return EMPTY;
1201 /* we have a match so add suffix */
1202 char * newword = (char *)buf.alloc(alen + appndl + 1);
1203 memcpy(newword, word, alen);
1204 memcpy(newword + alen, appnd, appndl + 1);
1205 return SimpleString(newword, alen + appndl);
1206 }
1207 }
1208 return SimpleString();
1209 }
1210
1211 // see if this suffix is present in the word
check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,int optflags,AffEntry * ppfx)1212 bool SfxEntry::check(const LookupInfo & linf, ParmString word,
1213 CheckInfo & ci, GuessInfo * gi,
1214 int optflags, AffEntry* ppfx)
1215 {
1216 unsigned tmpl; // length of tmpword
1217 int cond; // condition beng examined
1218 WordEntry wordinfo; // hash entry pointer
1219 byte * cp;
1220 VARARRAYM(char, tmpword, word.size()+stripl+1, MAXWORDLEN+1);
1221 PfxEntry* ep = (PfxEntry *) ppfx;
1222
1223 // if this suffix is being cross checked with a prefix
1224 // but it does not support cross products skip it
1225
1226 if ((optflags & XPRODUCT) != 0 && (xpflg & XPRODUCT) == 0)
1227 return false;
1228
1229 // upon entry suffix is 0 length or already matches the end of the word.
1230 // So if the remaining root word has positive length
1231 // and if there are enough chars in root word and added back strip chars
1232 // to meet the number of characters conditions, then test it
1233
1234 tmpl = word.size() - appndl;
1235
1236 if ((tmpl > 0) && (tmpl + stripl >= conds->num)) {
1237
1238 // generate new root word by removing suffix and adding
1239 // back any characters that would have been stripped or
1240 // or null terminating the shorter string
1241
1242 strcpy (tmpword, word);
1243 cp = (byte *)(tmpword + tmpl);
1244 if (stripl) {
1245 strcpy ((char *)cp, strip);
1246 tmpl += stripl;
1247 cp = (byte *)(tmpword + tmpl);
1248 } else *cp = '\0';
1249
1250 // now make sure all of the conditions on characters
1251 // are met. Please see the appendix at the end of
1252 // this file for more info on exactly what is being
1253 // tested
1254
1255 for (cond = conds->num; --cond >= 0; ) {
1256 if ((conds->get(*--cp) & (1 << cond)) == 0) break;
1257 }
1258
1259 // if all conditions are met then check if resulting
1260 // root word in the dictionary
1261
1262 if (cond < 0) {
1263 CheckInfo * lci = 0;
1264 tmpl += stripl;
1265 const SensitiveCompare * cmp =
1266 optflags & XPRODUCT ? &linf.sp->s_cmp_middle : &linf.sp->s_cmp_begin;
1267 int res = linf.lookup(tmpword, cmp, achar, wordinfo, gi);
1268 if (res == 1
1269 && ((optflags & XPRODUCT) == 0 || TESTAFF(wordinfo.aff, ep->achar)))
1270 {
1271 lci = &ci;
1272 lci->word = wordinfo.word;
1273 } else if (res == 1 && gi) {
1274 lci = gi->add();
1275 lci->word = wordinfo.word;
1276 } else if (res == -1) { // gi must be defined
1277 lci = gi->head;
1278 }
1279
1280 if (lci) {
1281 lci->suf_flag = achar;
1282 lci->suf_strip_len = stripl;
1283 lci->suf_add_len = appndl;
1284 lci->suf_add = appnd;
1285 }
1286
1287 if (lci == &ci) return true;
1288 }
1289 }
1290 return false;
1291 }
1292
1293 //////////////////////////////////////////////////////////////////////
1294 //
1295 // new_affix_mgr
1296 //
1297
1298
new_affix_mgr(ParmString name,Conv & iconv,const Language * lang)1299 PosibErr<AffixMgr *> new_affix_mgr(ParmString name,
1300 Conv & iconv,
1301 const Language * lang)
1302 {
1303 if (name == "none")
1304 return 0;
1305 //CERR << "NEW AFFIX MGR\n";
1306 String file;
1307 file += lang->data_dir();
1308 file += '/';
1309 file += lang->name();
1310 file += "_affix.dat";
1311 AffixMgr * affix;
1312 affix = new AffixMgr(lang);
1313 PosibErrBase pe = affix->setup(file, iconv);
1314 if (pe.has_err()) {
1315 delete affix;
1316 return pe;
1317 } else {
1318 return affix;
1319 }
1320 }
1321 }
1322
1323 /**************************************************************************
1324
1325 Appendix: Understanding Affix Code
1326
1327
1328 An affix is either a prefix or a suffix attached to root words to make
1329 other words.
1330
1331 Basically a Prefix or a Suffix is set of AffEntry objects
1332 which store information about the prefix or suffix along
1333 with supporting routines to check if a word has a particular
1334 prefix or suffix or a combination.
1335
1336 The structure affentry is defined as follows:
1337
1338 struct AffEntry
1339 {
1340 unsigned char achar; // char used to represent the affix
1341 char * strip; // string to strip before adding affix
1342 char * appnd; // the affix string to add
1343 short stripl; // length of the strip string
1344 short appndl; // length of the affix string
1345 short numconds; // the number of conditions that must be met
1346 short xpflg; // flag: XPRODUCT- combine both prefix and suffix
1347 char conds[SETSIZE]; // array which encodes the conditions to be met
1348 };
1349
1350
1351 Here is a suffix borrowed from the en_US.aff file. This file
1352 is whitespace delimited.
1353
1354 SFX D Y 4
1355 SFX D 0 e d
1356 SFX D y ied [^aeiou]y
1357 SFX D 0 ed [^ey]
1358 SFX D 0 ed [aeiou]y
1359
1360 This information can be interpreted as follows:
1361
1362 In the first line has 4 fields
1363
1364 Field
1365 -----
1366 1 SFX - indicates this is a suffix
1367 2 D - is the name of the character flag which represents this suffix
1368 3 Y - indicates it can be combined with prefixes (cross product)
1369 4 4 - indicates that sequence of 4 affentry structures are needed to
1370 properly store the affix information
1371
1372 The remaining lines describe the unique information for the 4 SfxEntry
1373 objects that make up this affix. Each line can be interpreted
1374 as follows: (note fields 1 and 2 are as a check against line 1 info)
1375
1376 Field
1377 -----
1378 1 SFX - indicates this is a suffix
1379 2 D - is the name of the character flag for this affix
1380 3 y - the string of chars to strip off before adding affix
1381 (a 0 here indicates the NULL string)
1382 4 ied - the string of affix characters to add
1383 5 [^aeiou]y - the conditions which must be met before the affix
1384 can be applied
1385
1386 Field 5 is interesting. Since this is a suffix, field 5 tells us that
1387 there are 2 conditions that must be met. The first condition is that
1388 the next to the last character in the word must *NOT* be any of the
1389 following "a", "e", "i", "o" or "u". The second condition is that
1390 the last character of the word must end in "y".
1391
1392 So how can we encode this information concisely and be able to
1393 test for both conditions in a fast manner? The answer is found
1394 but studying the wonderful ispell code of Geoff Kuenning, et.al.
1395 (now available under a normal BSD license).
1396
1397 If we set up a conds array of 256 bytes indexed (0 to 255) and access it
1398 using a character (cast to an unsigned char) of a string, we have 8 bits
1399 of information we can store about that character. Specifically we
1400 could use each bit to say if that character is allowed in any of the
1401 last (or first for prefixes) 8 characters of the word.
1402
1403 Basically, each character at one end of the word (up to the number
1404 of conditions) is used to index into the conds array and the resulting
1405 value found there says whether the that character is valid for a
1406 specific character position in the word.
1407
1408 For prefixes, it does this by setting bit 0 if that char is valid
1409 in the first position, bit 1 if valid in the second position, and so on.
1410
1411 If a bit is not set, then that char is not valid for that position in the
1412 word.
1413
1414 If working with suffixes bit 0 is used for the character closest
1415 to the front, bit 1 for the next character towards the end, ...,
1416 with bit numconds-1 representing the last char at the end of the string.
1417
1418 Note: since entries in the conds[] are 8 bits, only 8 conditions
1419 (read that only 8 character positions) can be examined at one
1420 end of a word (the beginning for prefixes and the end for suffixes.
1421
1422 So to make this clearer, lets encode the conds array values for the
1423 first two affentries for the suffix D described earlier.
1424
1425
1426 For the first affentry:
1427 numconds = 1 (only examine the last character)
1428
1429 conds['e'] = (1 << 0) (the word must end in an E)
1430 all others are all 0
1431
1432 For the second affentry:
1433 numconds = 2 (only examine the last two characters)
1434
1435 conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
1436 where X is all characters *but* a, e, i, o, or u
1437
1438
1439 conds['y'] = (1 << 1) (the last char must be a y)
1440 all other bits for all other entries in the conds array are zero
1441
1442
1443 **************************************************************************/
1444