1 // This file is part of The New Aspell
2 // Copyright (C) 2004 by Kevin Atkinson under the GNU LGPL
3 // license version 2.0 or 2.1.  You should have received a copy of the
4 // LGPL license along with this library if you did not you can find it
5 // at http://www.gnu.org/.
6 //
7 // This code is based on the the MySpell affix code:
8 //
9 /*
10  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada And
11  * Contributors.  All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  *
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  *
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  *
24  * 3. All modifications to the source code must be clearly marked as
25  *    such.  Binary redistributions based on modified source code
26  *    must be clearly marked as modified versions in the documentation
27  *    and/or other materials provided with the distribution.
28  *
29  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
30  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
32  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
33  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
34  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
36  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  */
43 
44 #include <cstdlib>
45 #include <cstring>
46 #include <cstdio>
47 
48 //#include "iostream.hpp"
49 
50 #include "affix.hpp"
51 #include "errors.hpp"
52 #include "getdata.hpp"
53 #include "parm_string.hpp"
54 #include "check_list.hpp"
55 #include "speller_impl.hpp"
56 #include "vararray.hpp"
57 #include "lsort.hpp"
58 #include "hash-t.hpp"
59 
60 #include "gettext.h"
61 
62 using namespace std;
63 
64 namespace aspeller {
65 
66 typedef unsigned char byte;
67 static char EMPTY[1] = {0};
68 
69 //////////////////////////////////////////////////////////////////////
70 //
71 // Entry struct definations
72 //
73 
74 struct Conds
75 {
76   char * str;
77   unsigned num;
78   char conds[SETSIZE];
getaspeller::Conds79   char get(byte i) const {return conds[i];}
80 };
81 
82 struct AffEntry
83 {
84   const char *   appnd;
85   const char *   strip;
86   byte           appndl;
87   byte           stripl;
88   byte           xpflg;
89   char           achar;
90   const Conds *  conds;
91   //unsigned int numconds;
92   //char         conds[SETSIZE];
93 };
94 
95 // A Prefix Entry
96 
97 struct PfxEntry : public AffEntry
98 {
99   PfxEntry * next;
100   PfxEntry * next_eq;
101   PfxEntry * next_ne;
102   PfxEntry * flag_next;
PfxEntryaspeller::PfxEntry103   PfxEntry() {}
104 
105   bool check(const LookupInfo &, const AffixMgr * pmyMgr,
106              ParmString, CheckInfo &, GuessInfo *, bool cross = true) const;
107 
allow_crossaspeller::PfxEntry108   inline bool          allow_cross() const { return ((xpflg & XPRODUCT) != 0); }
flagaspeller::PfxEntry109   inline byte flag() const { return achar;  }
keyaspeller::PfxEntry110   inline const char *  key() const  { return appnd;  }
111   bool applicable(SimpleString) const;
112   SimpleString add(SimpleString, ObjStack & buf) const;
113 };
114 
115 // A Suffix Entry
116 
117 struct SfxEntry : public AffEntry
118 {
119   const char * rappnd; // this is set in AffixMgr::build_sfxlist
120 
121   SfxEntry *   next;
122   SfxEntry *   next_eq;
123   SfxEntry *   next_ne;
124   SfxEntry *   flag_next;
125 
SfxEntryaspeller::SfxEntry126   SfxEntry() {}
127 
128   bool check(const LookupInfo &, ParmString, CheckInfo &, GuessInfo *,
129              int optflags, AffEntry * ppfx);
130 
allow_crossaspeller::SfxEntry131   inline bool          allow_cross() const { return ((xpflg & XPRODUCT) != 0); }
flagaspeller::SfxEntry132   inline byte flag() const { return achar;  }
keyaspeller::SfxEntry133   inline const char *  key() const  { return rappnd; }
134   bool applicable(SimpleString) const;
135   SimpleString add(SimpleString, ObjStack & buf, int limit, SimpleString) const;
136 };
137 
138 //////////////////////////////////////////////////////////////////////
139 //
140 // Utility functions declarations
141 //
142 
143 /* return 1 if s1 is subset of s2 */
isSubset(const char * s1,const char * s2)144 static bool isSubset(const char * s1, const char * s2)
145 {
146   while( *s1 && (*s1 == *s2) ) {
147     s1++;
148     s2++;
149   }
150   return (*s1 == '\0');
151 }
152 
153 // return 1 if s1 (reversed) is a leading subset of end of s2
isRevSubset(const char * s1,const char * end_of_s2,int len)154 static bool isRevSubset(const char * s1, const char * end_of_s2, int len)
155 {
156   while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
157     s1++;
158     end_of_s2--;
159     len --;
160   }
161   return (*s1 == '\0');
162 }
163 
164 template <class T>
165 struct AffixLess
166 {
operator ()aspeller::AffixLess167   bool operator() (T * x, T * y) const {return strcmp(x->key(),y->key()) < 0;}
168 };
169 
170 // struct StringLookup {
171 //   struct Parms {
172 //     typedef const char * Value;
173 //     typedef const char * Key;
174 //     static const bool is_multi = false;
175 //     hash<const char *> hfun;
176 //     size_t hash(const char * s) {return hfun(s);}
177 //     bool equal(const char * x, const char * y) {return strcmp(x,y) == 0;}
178 //     const char * key(const char * c) {return c;}
179 //   };
180 //   typedef HashTable<Parms> Lookup;
181 //   Lookup lookup;
182 //   ObjStack * data_buf;
183 //   StringLookup(ObjStack * b) : data_buf(b) {}
184 //   const char * dup(const char * orig) {
185 //     pair<Lookup::iterator, bool> res = lookup.insert(orig);
186 //     if (res.second) *res.first = data_buf->dup(orig);
187 //     return *res.first;
188 //     //return data_buf->dup(orig);
189 //   }
190 // };
191 
192 struct CondsLookupParms {
193   typedef const Conds * Value;
194   typedef const char * Key;
195   static const bool is_multi = false;
196   acommon::hash<const char *> hfun;
hashaspeller::CondsLookupParms197   size_t hash(const char * s) {return hfun(s);}
equalaspeller::CondsLookupParms198   bool equal(const char * x, const char * y) {return strcmp(x,y) == 0;}
keyaspeller::CondsLookupParms199   const char * key(const Conds * c) {return c->str;}
200 };
201 
202 typedef HashTable<CondsLookupParms> CondsLookup;
203 
204 // normalizes and checks the cond_str
205 // returns the length of the new string or -1 if invalid
normalize_cond_str(char * str)206 static int normalize_cond_str(char * str)
207 {
208   char * s = str;
209   char * d = str;
210   while (*s) {
211     if (*s != '[') {
212       *d++ = *s++;
213     } else if (s[1] == '\0' || s[1] == ']') {
214       return -1;
215     } else if (s[2] == ']') {
216       *d++ = s[1];
217       s += 3;
218     } else {
219       *d++ = *s++;
220       if (*s == '^') *d++ = *s++;
221       while (*s != ']') {
222         if (*s == '\0' || *s == '[') return -1;
223         char * min = s;
224         for (char * i = s + 1; *i != ']'; ++i) {
225           if ((byte)*i < (byte)*min) min = i;}
226         char c = *s;
227         *d++ = *min;
228         *min = c;
229         ++s;
230       }
231       *d++ = *s++;
232     }
233   }
234   *d = '\0';
235   return d - str;
236 }
237 
238 static void encodeit(CondsLookup &, ObjStack &,
239                      AffEntry * ptr, char * cs);
240 
241 //////////////////////////////////////////////////////////////////////
242 //
243 // Affix Manager
244 //
245 
setup(ParmString affpath,Conv & iconv)246 PosibErr<void> AffixMgr::setup(ParmString affpath, Conv & iconv)
247 {
248   // register hash manager and load affix data from aff file
249   //cpdmin = 3;  // default value
250   max_strip_ = 0;
251   for (int i=0; i < SETSIZE; i++) {
252     pStart[i] = NULL;
253     sStart[i] = NULL;
254     pFlag[i] = NULL;
255     sFlag[i] = NULL;
256     max_strip_f[i] = 0;
257   }
258   return parse_file(affpath, iconv);
259 }
260 
AffixMgr(const Language * l)261 AffixMgr::AffixMgr(const Language * l)
262   : lang(l), data_buf(1024*16) {}
263 
~AffixMgr()264 AffixMgr::~AffixMgr() {}
265 
max_(int & lhs,int rhs)266 static inline void max_(int & lhs, int rhs)
267 {
268   if (lhs < rhs) lhs = rhs;
269 }
270 
271 // read in aff file and build up prefix and suffix entry objects
parse_file(const char * affpath,Conv & iconv)272 PosibErr<void> AffixMgr::parse_file(const char * affpath, Conv & iconv)
273 {
274   // io buffers
275   String buf; DataPair dp;
276 
277   CondsLookup conds_lookup;
278 
279   // open the affix file
280   affix_file = data_buf.dup(affpath);
281   FStream afflst;
282   RET_ON_ERR(afflst.open(affpath,"r"));
283 
284   // step one is to parse the affix file building up the internal
285   // affix data structures
286 
287   // read in each line ignoring any that do not
288   // start with a known line type indicator
289 
290   char prev_aff = '\0';
291 
292   while (getdata_pair(afflst,dp,buf)) {
293     char affix_type = ' ';
294 
295     /* parse in the name of the character set used by the .dict and .aff */
296 
297     if (dp.key == "SET") {
298       String buf;
299       encoding = data_buf.dup(fix_encoding_str(dp.value, buf));
300       if (strcmp(encoding, lang->data_encoding()) != 0)
301         return make_err(incorrect_encoding, affix_file, lang->data_encoding(), encoding);
302     }
303 
304     /* parse in the flag used by the controlled compound words */
305     //else if (d.key == "COMPOUNDFLAG")
306     //  compound = data_buf.dup(d.value);
307 
308     /* parse in the flag used by the controlled compound words */
309     //else if (d.key == "COMPOUNDMIN")
310     //  cpdmin = atoi(d.value); // FiXME
311 
312     //else if (dp.key == "TRY" || dp.key == "REP");
313 
314     else if (dp.key == "PFX" || dp.key == "SFX")
315       affix_type = dp.key[0];
316 
317     if (affix_type == ' ') continue;
318 
319     //
320     // parse this affix: P - prefix, S - suffix
321     //
322 
323     int numents = 0;      // number of affentry structures to parse
324     char achar='\0';      // affix char identifier
325     short xpflg=0;
326     AffEntry * nptr;
327     {
328       // split affix header line into pieces
329       split(dp);
330       if (dp.key.empty()) goto error;
331       // key is affix char
332       const char * astr = iconv(dp.key);
333       if (astr[0] == '\0' || astr[1] != '\0') goto error;
334       achar = astr[0];
335       if (achar == prev_aff) goto error_count;
336       prev_aff = achar;
337 
338       split(dp);
339       if (dp.key.size != 1 ||
340           !(dp.key[0] == 'Y' || dp.key[0] == 'N')) goto error;
341       // key is cross product indicator
342       if (dp.key[0] == 'Y') xpflg = XPRODUCT;
343 
344       split(dp);
345       if (dp.key.empty()) goto error;
346       // key is number of affentries
347 
348       numents = atoi(dp.key);
349 
350       for (int j = 0; j < numents; j++) {
351         getdata_pair(afflst, dp, buf);
352 
353         if (affix_type == 'P') {
354           nptr = (AffEntry *) data_buf.alloc_bottom(sizeof(PfxEntry));
355           new (nptr) PfxEntry;
356         } else {
357           nptr = (AffEntry *) data_buf.alloc_bottom(sizeof(SfxEntry));
358           new (nptr) SfxEntry;
359         }
360 
361         nptr->xpflg = xpflg;
362 
363         split(dp);
364         if (dp.key.empty()) goto error;
365         // key is affix charter
366         if (iconv(dp.key)[0] != achar) goto error_count;
367         nptr->achar = achar;
368 
369         split(dp);
370         if (dp.key.empty()) goto error;
371         // key is strip
372         if (dp.key != "0") {
373           ParmString s0(iconv(dp.key));
374           max_(max_strip_, s0.size());
375           max_(max_strip_f[(byte)achar], s0.size());
376           nptr->strip = data_buf.dup(s0);
377           nptr->stripl = s0.size();
378         } else {
379           nptr->strip  = "";
380           nptr->stripl = 0;
381         }
382 
383         split(dp);
384         if (dp.key.empty()) goto error;
385         // key is affix string or 0 for null
386         if (dp.key != "0") {
387           nptr->appnd  = data_buf.dup(iconv(dp.key));
388           nptr->appndl = strlen(nptr->appnd);
389         } else {
390           nptr->appnd  = "";
391           nptr->appndl = 0;
392         }
393 
394         split(dp);
395         if (dp.key.empty()) goto error;
396         // key is the conditions descriptions
397         char * cond = iconv(dp.key);
398         int cond_len = normalize_cond_str(cond);
399         if (cond_len < 0)
400           return (make_err(invalid_cond, MsgConv(lang)(cond))
401                   .with_file(affix_file, dp.line_num));
402         if (nptr->stripl != 0) {
403           char * cc = cond;
404           if (affix_type == 'S') cc += cond_len - nptr->stripl;
405           if (cond_len < nptr->stripl ||
406               memcmp(cc, nptr->strip, nptr->stripl) != 0)
407             return (make_err(invalid_cond_strip,
408                              MsgConv(lang)(cond), MsgConv(lang)(nptr->strip))
409                     .with_file(affix_file, dp.line_num));
410         }
411         encodeit(conds_lookup, data_buf, nptr, cond);
412 
413         // now create SfxEntry or PfxEntry objects and use links to
414         // build an ordered (sorted by affix string) list
415         if (affix_type == 'P')
416           build_pfxlist(static_cast<PfxEntry *>(nptr));
417         else
418           build_sfxlist(static_cast<SfxEntry *>(nptr));
419       }
420     }
421     continue;
422   error:
423     return make_err(corrupt_affix, MsgConv(lang)(achar)).with_file(affix_file, dp.line_num);
424   error_count:
425     return make_err(corrupt_affix, MsgConv(lang)(achar),
426                     _("Possibly incorrect count.")).with_file(affix_file, dp.line_num);
427   }
428   afflst.close();
429 
430   // now we can speed up performance greatly taking advantage of the
431   // relationship between the affixes and the idea of "subsets".
432 
433   // View each prefix as a potential leading subset of another and view
434   // each suffix (reversed) as a potential trailing subset of another.
435 
436   // To illustrate this relationship if we know the prefix "ab" is
437   // found in the word to examine, only prefixes that "ab" is a
438   // leading subset of need be examined.  Furthermore is "ab" is not
439   // present then none of the prefixes that "ab" is is a subset need
440   // be examined.
441 
442   // The same argument goes for suffix string that are reversed.
443 
444   // Then to top this off why not examine the first char of the word
445   // to quickly limit the set of prefixes to examine (i.e. the
446   // prefixes to examine must be leading supersets of the first
447   // character of the word (if they exist)
448 
449   // To take advantage of this "subset" relationship, we need to add
450   // two links from entry.  One to take next if the current prefix
451   // is found (call it nexteq) and one to take next if the current
452   // prefix is not found (call it nextne).
453 
454   // Since we have built ordered lists, all that remains is to
455   // properly initialize the nextne and nexteq pointers that relate
456   // them
457 
458   process_pfx_order();
459   process_sfx_order();
460 
461   //CERR.printf("%u\n", data_buf.calc_size()/1024);
462 
463   return no_err;
464 
465 }
466 
467 
468 // we want to be able to quickly access prefix information
469 // both by prefix flag, and sorted by prefix string itself
470 // so we need to set up two indexes
471 
build_pfxlist(PfxEntry * pfxptr)472 PosibErr<void> AffixMgr::build_pfxlist(PfxEntry* pfxptr)
473 {
474   PfxEntry * ptr;
475   PfxEntry * ep = pfxptr;
476 
477   // get the right starting point
478   const char * key = ep->key();
479   const byte flg = ep->flag();
480 
481   // first index by flag which must exist
482   ptr = pFlag[flg];
483   ep->flag_next = ptr;
484   pFlag[flg] = ep;
485 
486   // next insert the affix string, it will be sorted latter
487 
488   byte sp = *((const byte *)key);
489   ptr = pStart[sp];
490   ep->next = ptr;
491   pStart[sp] = ep;
492   return no_err;
493 }
494 
495 // we want to be able to quickly access suffix information
496 // both by suffix flag, and sorted by the reverse of the
497 // suffix string itself; so we need to set up two indexes
498 
build_sfxlist(SfxEntry * sfxptr)499 PosibErr<void> AffixMgr::build_sfxlist(SfxEntry* sfxptr)
500 {
501   SfxEntry * ptr;
502   SfxEntry * ep = sfxptr;
503   char * tmp = (char *)data_buf.alloc(sfxptr->appndl + 1);
504   sfxptr->rappnd = tmp;
505 
506   // reverse the string
507   char * dest = tmp + sfxptr->appndl;
508   *dest-- = 0;
509   const char * src = sfxptr->appnd;
510   for (; dest >= tmp; --dest, ++src)
511     *dest = *src;
512 
513   /* get the right starting point */
514   const char * key = ep->key();
515   const byte flg = ep->flag();
516 
517   // first index by flag which must exist
518   ptr = sFlag[flg];
519   ep->flag_next = ptr;
520   sFlag[flg] = ep;
521 
522   // next insert the affix string, it will be sorted latter
523 
524   byte sp = *((const byte *)key);
525   ptr = sStart[sp];
526   ep->next = ptr;
527   sStart[sp] = ep;
528   return no_err;
529 }
530 
531 
532 
533 // initialize the PfxEntry links NextEQ and NextNE to speed searching
process_pfx_order()534 PosibErr<void> AffixMgr::process_pfx_order()
535 {
536   PfxEntry* ptr;
537 
538   // loop through each prefix list starting point
539   for (int i=1; i < SETSIZE; i++) {
540 
541     ptr = pStart[i];
542 
543     if (ptr && ptr->next)
544       ptr = pStart[i] = sort(ptr, AffixLess<PfxEntry>());
545 
546     // look through the remainder of the list
547     //  and find next entry with affix that
548     // the current one is not a subset of
549     // mark that as destination for NextNE
550     // use next in list that you are a subset
551     // of as NextEQ
552 
553     for (; ptr != NULL; ptr = ptr->next) {
554 
555       PfxEntry * nptr = ptr->next;
556       for (; nptr != NULL; nptr = nptr->next) {
557         if (! isSubset( ptr->key() , nptr->key() )) break;
558       }
559       ptr->next_ne = nptr;
560       ptr->next_eq = NULL;
561       if ((ptr->next) && isSubset(ptr->key() ,
562                                   (ptr->next)->key()))
563         ptr->next_eq = ptr->next;
564     }
565 
566     // now clean up by adding smart search termination strings
567     // if you are already a superset of the previous prefix
568     // but not a subset of the next, search can end here
569     // so set NextNE properly
570 
571     ptr = pStart[i];
572     for (; ptr != NULL; ptr = ptr->next) {
573       PfxEntry * nptr = ptr->next;
574       PfxEntry * mptr = NULL;
575       for (; nptr != NULL; nptr = nptr->next) {
576         if (! isSubset(ptr->key(),nptr->key())) break;
577         mptr = nptr;
578       }
579       if (mptr) mptr->next_ne = NULL;
580     }
581   }
582   return no_err;
583 }
584 
585 
586 
587 // initialize the SfxEntry links NextEQ and NextNE to speed searching
process_sfx_order()588 PosibErr<void> AffixMgr::process_sfx_order()
589 {
590   SfxEntry* ptr;
591 
592   // loop through each prefix list starting point
593   for (int i=1; i < SETSIZE; i++) {
594 
595     ptr = sStart[i];
596 
597     if (ptr && ptr->next)
598       ptr = sStart[i] = sort(ptr, AffixLess<SfxEntry>());
599 
600     // look through the remainder of the list
601     //  and find next entry with affix that
602     // the current one is not a subset of
603     // mark that as destination for NextNE
604     // use next in list that you are a subset
605     // of as NextEQ
606 
607     for (; ptr != NULL; ptr = ptr->next) {
608       SfxEntry * nptr = ptr->next;
609       for (; nptr != NULL; nptr = nptr->next) {
610         if (! isSubset(ptr->key(),nptr->key())) break;
611       }
612       ptr->next_ne = nptr;
613       ptr->next_eq = NULL;
614       if ((ptr->next) && isSubset(ptr->key(),(ptr->next)->key()))
615         ptr->next_eq = ptr->next;
616     }
617 
618 
619     // now clean up by adding smart search termination strings:
620     // if you are already a superset of the previous suffix
621     // but not a subset of the next, search can end here
622     // so set NextNE properly
623 
624     ptr = sStart[i];
625     for (; ptr != NULL; ptr = ptr->next) {
626       SfxEntry * nptr = ptr->next;
627       SfxEntry * mptr = NULL;
628       for (; nptr != NULL; nptr = nptr->next) {
629         if (! isSubset(ptr->key(),nptr->key())) break;
630         mptr = nptr;
631       }
632       if (mptr) mptr->next_ne = NULL;
633     }
634   }
635   return no_err;
636 }
637 
638 // takes aff file condition string and creates the
639 // conds array - please see the appendix at the end of the
640 // file affentry.cxx which describes what is going on here
641 // in much more detail
642 
encodeit(CondsLookup & l,ObjStack & buf,AffEntry * ptr,char * cs)643 static void encodeit(CondsLookup & l, ObjStack & buf,
644                      AffEntry * ptr, char * cs)
645 {
646   byte c;
647   int i, j, k;
648 
649   // see if we already have this conds matrix
650 
651   CondsLookup::iterator itr = l.find(cs);
652   if (!(itr == l.end())) {
653     ptr->conds = *itr;
654     return;
655   }
656 
657   Conds * cds = (Conds *)buf.alloc_bottom(sizeof(Conds));
658   cds->str = buf.dup(cs);
659   l.insert(cds);
660   ptr->conds = cds;
661 
662   int nc = strlen(cs);
663   VARARRAYM(byte, mbr, nc + 1, MAXLNLEN);
664 
665   // now clear the conditions array
666   memset(cds->conds, 0, sizeof(cds->conds));
667 
668   // now parse the string to create the conds array
669 
670   int neg = 0;   // complement indicator
671   int grp = 0;   // group indicator
672   int n = 0;     // number of conditions
673   int ec = 0;    // end condition indicator
674   int nm = 0;    // number of member in group
675 
676   // if no condition just return
677   if (strcmp(cs,".")==0) {
678     cds->num = 0;
679     return;
680   }
681 
682   i = 0;
683   while (i < nc) {
684     c = *((byte *)(cs + i));
685 
686     // start group indicator
687     if (c == '[') {
688       grp = 1;
689       c = 0;
690     }
691 
692     // complement flag
693     if ((grp == 1) && (c == '^')) {
694       neg = 1;
695       c = 0;
696     }
697 
698     // end goup indicator
699     if (c == ']') {
700       ec = 1;
701       c = 0;
702     }
703 
704     // add character of group to list
705     if ((grp == 1) && (c != 0)) {
706       *(mbr + nm) = c;
707       nm++;
708       c = 0;
709     }
710 
711     // end of condition
712     if (c != 0) {
713       ec = 1;
714     }
715 
716 
717     if (ec) {
718       if (grp == 1) {
719         if (neg == 0) {
720           // set the proper bits in the condition array vals for those chars
721           for (j=0;j<nm;j++) {
722             k = (unsigned int) mbr[j];
723             cds->conds[k] = cds->conds[k] | (1 << n);
724           }
725         } else {
726           // complement so set all of them and then unset indicated ones
727           for (j=0;j<SETSIZE;j++) cds->conds[j] = cds->conds[j] | (1 << n);
728           for (j=0;j<nm;j++) {
729             k = (unsigned int) mbr[j];
730             cds->conds[k] = cds->conds[k] & ~(1 << n);
731           }
732         }
733         neg = 0;
734         grp = 0;
735         nm = 0;
736       } else {
737         // not a group so just set the proper bit for this char
738         // but first handle special case of . inside condition
739         if (c == '.') {
740           // wild card character so set them all
741           for (j=0;j<SETSIZE;j++) cds->conds[j] = cds->conds[j] | (1 << n);
742         } else {
743           cds->conds[(unsigned int)c] = cds->conds[(unsigned int)c] | (1 << n);
744         }
745       }
746       n++;
747       ec = 0;
748     }
749 
750 
751     i++;
752   }
753   cds->num = n;
754   return;
755 }
756 
757 
758 // check word for prefixes
prefix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,bool cross) const759 bool AffixMgr::prefix_check (const LookupInfo & linf, ParmString word,
760                              CheckInfo & ci, GuessInfo * gi, bool cross) const
761 {
762   if (word.empty()) return false;
763 
764   // first handle the special case of 0 length prefixes
765   PfxEntry * pe = pStart[0];
766   while (pe) {
767     if (pe->check(linf,this,word,ci,gi)) return true;
768     pe = pe->next;
769   }
770 
771   // now handle the general case
772   byte sp = *reinterpret_cast<const byte *>(word.str());
773   PfxEntry * pptr = pStart[sp];
774 
775   while (pptr) {
776     if (isSubset(pptr->key(),word)) {
777       if (pptr->check(linf,this,word,ci,gi,cross)) return true;
778       pptr = pptr->next_eq;
779     } else {
780       pptr = pptr->next_ne;
781     }
782   }
783 
784   return false;
785 }
786 
787 
788 // check word for suffixes
suffix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,int sfxopts,AffEntry * ppfx) const789 bool AffixMgr::suffix_check (const LookupInfo & linf, ParmString word,
790                              CheckInfo & ci, GuessInfo * gi,
791                              int sfxopts, AffEntry * ppfx) const
792 {
793   if (word.empty()) return false;
794 
795   // first handle the special case of 0 length suffixes
796   SfxEntry * se = sStart[0];
797   while (se) {
798     if (se->check(linf, word, ci, gi, sfxopts, ppfx)) return true;
799     se = se->next;
800   }
801 
802   // now handle the general case
803   byte sp = *((const byte *)(word + word.size() - 1));
804   SfxEntry * sptr = sStart[sp];
805 
806   while (sptr) {
807     if (isRevSubset(sptr->key(), word + word.size() - 1, word.size())) {
808       if (sptr->check(linf, word, ci, gi, sfxopts, ppfx)) return true;
809       sptr = sptr->next_eq;
810     } else {
811       sptr = sptr->next_ne;
812     }
813   }
814 
815   return false;
816 }
817 
818 // check if word with affixes is correctly spelled
affix_check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi) const819 bool AffixMgr::affix_check(const LookupInfo & linf, ParmString word,
820                            CheckInfo & ci, GuessInfo * gi) const
821 {
822   if (word.empty()) return false;
823 
824   // Deal With Case in a semi-intelligent manner
825   CasePattern cp = lang->LangImpl::case_pattern(word);
826   ParmString pword = word;
827   ParmString sword = word;
828   CharVector lower;
829   if (cp == FirstUpper) {
830     lower.append(word, word.size() + 1);
831     lower[0] = lang->to_lower(word[0]);
832     pword = ParmString(lower.data(), lower.size() - 1);
833   } else if (cp == AllUpper) {
834     lower.resize(word.size() + 1);
835     unsigned int i = 0;
836     for (; i != word.size(); ++i)
837       lower[i] = lang->to_lower(word[i]);
838     lower[i] = '\0';
839     pword = ParmString(lower.data(), lower.size() - 1);
840     sword = pword;
841   }
842 
843   // check all prefixes (also crossed with suffixes if allowed)
844   if (prefix_check(linf, pword, ci, gi)) return true;
845 
846   // if still not found check all suffixes
847   if (suffix_check(linf, sword, ci, gi, 0, NULL)) return true;
848 
849   // if still not found check again but with the lower case version
850   // which can make a difference if the entire word matches the cond
851   // string
852   if (cp == FirstUpper) {
853     return suffix_check(linf, pword, ci, gi, 0, NULL);
854   } else {
855     return false;
856   }
857 }
858 
munch(ParmString word,GuessInfo * gi,bool cross) const859 void AffixMgr::munch(ParmString word, GuessInfo * gi, bool cross) const
860 {
861   LookupInfo li(0, LookupInfo::AlwaysTrue);
862   CheckInfo ci;
863   gi->reset();
864   CasePattern cp = lang->LangImpl::case_pattern(word);
865   if (cp == AllUpper) return;
866   if (cp != FirstUpper)
867     prefix_check(li, word, ci, gi, cross);
868   suffix_check(li, word, ci, gi, 0, NULL);
869 }
870 
expand(ParmString word,ParmString aff,ObjStack & buf,int limit) const871 WordAff * AffixMgr::expand(ParmString word, ParmString aff,
872                            ObjStack & buf, int limit) const
873 {
874   byte * empty = (byte *)buf.alloc(1);
875   *empty = 0;
876 
877   byte * suf  = (byte *)buf.alloc(aff.size() + 1);
878   byte * suf_e = suf;
879   byte * csuf = (byte *)buf.alloc(aff.size() + 1);
880   byte * csuf_e = csuf;
881 
882   WordAff * head = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
883   WordAff * cur = head;
884   cur->word = buf.dup(word);
885   cur->aff  = suf;
886 
887   for (const byte * c = (const byte *)aff.str(), * end = c + aff.size();
888        c != end;
889        ++c)
890   {
891     if (sFlag[*c]) *suf_e++ = *c;
892     if (sFlag[*c] && sFlag[*c]->allow_cross()) *csuf_e++ = *c;
893 
894     for (PfxEntry * p = pFlag[*c]; p; p = p->flag_next) {
895       SimpleString newword = p->add(word, buf);
896       if (!newword) continue;
897       cur->next = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
898       cur = cur->next;
899       cur->word = newword;
900       cur->aff = p->allow_cross() ? csuf : empty;
901     }
902   }
903 
904   *suf_e = 0;
905   *csuf_e = 0;
906   cur->next = 0;
907 
908   if (limit == 0) return head;
909 
910   WordAff * * end = &cur->next;
911   WordAff * * very_end = end;
912   size_t nsuf_s = suf_e - suf + 1;
913 
914   for (WordAff * * cur = &head; cur != end; cur = &(*cur)->next) {
915     if ((int)(*cur)->word.size - max_strip_ >= limit) continue;
916     byte * nsuf = (byte *)buf.alloc(nsuf_s);
917     expand_suffix((*cur)->word, (*cur)->aff, buf, limit, nsuf, &very_end, word);
918     (*cur)->aff = nsuf;
919   }
920 
921   return head;
922 }
923 
expand_suffix(ParmString word,const byte * aff,ObjStack & buf,int limit,byte * new_aff,WordAff *** l,ParmString orig_word) const924 WordAff * AffixMgr::expand_suffix(ParmString word, const byte * aff,
925                                   ObjStack & buf, int limit,
926                                   byte * new_aff, WordAff * * * l,
927                                   ParmString orig_word) const
928 {
929   WordAff * head = 0;
930   if (l) head = **l;
931   WordAff * * cur = l ? *l : &head;
932   bool expanded     = false;
933   bool not_expanded = false;
934   if (!orig_word) orig_word = word;
935 
936   while (*aff) {
937     if ((int)word.size() - max_strip_f[*aff] < limit) {
938       for (SfxEntry * p = sFlag[*aff]; p; p = p->flag_next) {
939         SimpleString newword = p->add(word, buf, limit, orig_word);
940         if (!newword) continue;
941         if (newword == EMPTY) {not_expanded = true; continue;}
942         *cur = (WordAff *)buf.alloc_bottom(sizeof(WordAff));
943         (*cur)->word = newword;
944         (*cur)->aff  = (const byte *)EMPTY;
945         cur = &(*cur)->next;
946         expanded = true;
947       }
948     }
949     if (new_aff && (!expanded || not_expanded)) *new_aff++ = *aff;
950     ++aff;
951   }
952   *cur = 0;
953   if (new_aff) *new_aff = 0;
954   if (l) *l = cur;
955   return head;
956 }
957 
check_affix(ParmString word,char aff) const958 CheckAffixRes AffixMgr::check_affix(ParmString word, char aff) const
959 {
960   CheckAffixRes res = InvalidAffix;
961 
962   for (PfxEntry * p = pFlag[(unsigned char)aff]; p; p = p->flag_next) {
963     res = InapplicableAffix;
964     if (p->applicable(word)) return ValidAffix;
965   }
966 
967   for (SfxEntry * p = sFlag[(unsigned char)aff]; p; p = p->flag_next) {
968     if (res == InvalidAffix) res = InapplicableAffix;
969     if (p->applicable(word)) return ValidAffix;
970   }
971 
972   return res;
973 }
974 
975 
976 
977 //////////////////////////////////////////////////////////////////////
978 //
979 // LookupInfo
980 //
981 
lookup(ParmString word,const SensitiveCompare * c,char achar,WordEntry & o,GuessInfo * gi) const982 int LookupInfo::lookup (ParmString word, const SensitiveCompare * c,
983                         char achar,
984                         WordEntry & o, GuessInfo * gi) const
985 {
986   SpellerImpl::WS::const_iterator i = begin;
987   const char * g = 0;
988   if (mode == Word) {
989     do {
990       (*i)->lookup(word, c, o);
991       for (;!o.at_end(); o.adv()) {
992         if (TESTAFF(o.aff, achar))
993           return 1;
994         else
995           g = o.word;
996       }
997       ++i;
998     } while (i != end);
999   } else if (mode == Clean) {
1000     do {
1001       (*i)->clean_lookup(word, o);
1002       for (;!o.at_end(); o.adv()) {
1003         if (TESTAFF(o.aff, achar))
1004           return 1;
1005         else
1006           g = o.word;
1007       }
1008       ++i;
1009     } while (i != end);
1010   } else if (gi) {
1011     g = gi->dup(word);
1012   }
1013   if (gi && g) {
1014     CheckInfo * ci = gi->add();
1015     ci->word = g;
1016     return -1;
1017   }
1018   return 0;
1019 }
1020 
1021 //////////////////////////////////////////////////////////////////////
1022 //
1023 // Affix Entry
1024 //
1025 
applicable(SimpleString word) const1026 bool PfxEntry::applicable(SimpleString word) const
1027 {
1028   unsigned int cond;
1029   /* make sure all conditions match */
1030   if ((word.size > stripl) && (word.size >= conds->num)) {
1031     const byte * cp = (const byte *) word.str;
1032     for (cond = 0;  cond < conds->num;  cond++) {
1033       if ((conds->get(*cp++) & (1 << cond)) == 0)
1034         break;
1035     }
1036     if (cond >= conds->num) return true;
1037   }
1038   return false;
1039 }
1040 
1041 // add prefix to this word assuming conditions hold
add(SimpleString word,ObjStack & buf) const1042 SimpleString PfxEntry::add(SimpleString word, ObjStack & buf) const
1043 {
1044   unsigned int cond;
1045   /* make sure all conditions match */
1046   if ((word.size > stripl) && (word.size >= conds->num)) {
1047     const byte * cp = (const byte *) word.str;
1048     for (cond = 0;  cond < conds->num;  cond++) {
1049       if ((conds->get(*cp++) & (1 << cond)) == 0)
1050         break;
1051     }
1052     if (cond >= conds->num) {
1053       /* */
1054       int alen = word.size - stripl;
1055       char * newword = (char *)buf.alloc(alen + appndl + 1);
1056       if (appndl) memcpy(newword, appnd, appndl);
1057       memcpy(newword + appndl, word + stripl, alen + 1);
1058       return SimpleString(newword, alen + appndl);
1059     }
1060   }
1061   return SimpleString();
1062 }
1063 
1064 // check if this prefix entry matches
check(const LookupInfo & linf,const AffixMgr * pmyMgr,ParmString word,CheckInfo & ci,GuessInfo * gi,bool cross) const1065 bool PfxEntry::check(const LookupInfo & linf, const AffixMgr * pmyMgr,
1066                      ParmString word,
1067                      CheckInfo & ci, GuessInfo * gi, bool cross) const
1068 {
1069   unsigned int		cond;	// condition number being examined
1070   unsigned              tmpl;   // length of tmpword
1071   WordEntry             wordinfo;     // hash entry of root word or NULL
1072   byte *	cp;
1073   VARARRAYM(char, tmpword, word.size()+stripl+1, MAXWORDLEN+1);
1074 
1075   // on entry prefix is 0 length or already matches the beginning of the word.
1076   // So if the remaining root word has positive length
1077   // and if there are enough chars in root word and added back strip chars
1078   // to meet the number of characters conditions, then test it
1079 
1080   tmpl = word.size() - appndl;
1081 
1082   if ((tmpl > 0) &&  (tmpl + stripl >= conds->num)) {
1083 
1084     // generate new root word by removing prefix and adding
1085     // back any characters that would have been stripped
1086 
1087     if (stripl) strcpy (tmpword, strip);
1088     strcpy ((tmpword + stripl), (word + appndl));
1089 
1090     // now make sure all of the conditions on characters
1091     // are met.  Please see the appendix at the end of
1092     // this file for more info on exactly what is being
1093     // tested
1094 
1095     cp = (byte *)tmpword;
1096     for (cond = 0;  cond < conds->num;  cond++) {
1097       if ((conds->get(*cp++) & (1 << cond)) == 0) break;
1098     }
1099 
1100     // if all conditions are met then check if resulting
1101     // root word in the dictionary
1102 
1103     if (cond >= conds->num) {
1104       CheckInfo * lci = 0;
1105       CheckInfo * guess = 0;
1106       tmpl += stripl;
1107 
1108       int res = linf.lookup(tmpword, &linf.sp->s_cmp_end, achar, wordinfo, gi);
1109 
1110       if (res == 1) {
1111 
1112         lci = &ci;
1113         lci->word = wordinfo.word;
1114         goto quit;
1115 
1116       } else if (res == -1) {
1117 
1118         guess = gi->head;
1119 
1120       }
1121 
1122       // prefix matched but no root word was found
1123       // if XPRODUCT is allowed, try again but now
1124       // cross checked combined with a suffix
1125 
1126       if (gi)
1127         lci = gi->head;
1128 
1129       if (cross && xpflg & XPRODUCT) {
1130         if (pmyMgr->suffix_check(linf, ParmString(tmpword, tmpl),
1131                                  ci, gi,
1132                                  XPRODUCT, (AffEntry *)this)) {
1133           lci = &ci;
1134 
1135         } else if (gi) {
1136 
1137           CheckInfo * stop = lci;
1138           for (lci = gi->head;
1139                lci != stop;
1140                lci = const_cast<CheckInfo *>(lci->next))
1141           {
1142             lci->pre_flag = achar;
1143             lci->pre_strip_len = stripl;
1144             lci->pre_add_len = appndl;
1145             lci->pre_add = appnd;
1146           }
1147 
1148         } else {
1149 
1150           lci = 0;
1151 
1152         }
1153       }
1154 
1155       if (guess)
1156         lci = guess;
1157 
1158     quit:
1159       if (lci) {
1160         lci->pre_flag = achar;
1161         lci->pre_strip_len = stripl;
1162         lci->pre_add_len = appndl;
1163         lci->pre_add = appnd;
1164       }
1165       if (lci == &ci) return true;
1166     }
1167   }
1168   return false;
1169 }
1170 
applicable(SimpleString word) const1171 bool SfxEntry::applicable(SimpleString word) const
1172 {
1173   int cond;
1174   /* make sure all conditions match */
1175   if ((word.size > stripl) && (word.size >= conds->num)) {
1176     const byte * cp = (const byte *) (word + word.size);
1177     for (cond = conds->num; --cond >=0; ) {
1178       if ((conds->get(*--cp) & (1 << cond)) == 0)
1179         break;
1180     }
1181     if (cond < 0) return true;
1182   }
1183   return false;
1184 }
1185 
1186 // add suffix to this word assuming conditions hold
add(SimpleString word,ObjStack & buf,int limit,SimpleString orig_word) const1187 SimpleString SfxEntry::add(SimpleString word, ObjStack & buf,
1188                            int limit, SimpleString orig_word) const
1189 {
1190   int cond;
1191   /* make sure all conditions match */
1192   if ((orig_word.size > stripl) && (orig_word.size >= conds->num)) {
1193     const byte * cp = (const byte *) (orig_word + orig_word.size);
1194     for (cond = conds->num; --cond >=0; ) {
1195       if ((conds->get(*--cp) & (1 << cond)) == 0)
1196         break;
1197     }
1198     if (cond < 0) {
1199       int alen = word.size - stripl;
1200       if (alen >= limit) return EMPTY;
1201       /* we have a match so add suffix */
1202       char * newword = (char *)buf.alloc(alen + appndl + 1);
1203       memcpy(newword, word, alen);
1204       memcpy(newword + alen, appnd, appndl + 1);
1205       return SimpleString(newword, alen + appndl);
1206     }
1207   }
1208   return SimpleString();
1209 }
1210 
1211 // see if this suffix is present in the word
check(const LookupInfo & linf,ParmString word,CheckInfo & ci,GuessInfo * gi,int optflags,AffEntry * ppfx)1212 bool SfxEntry::check(const LookupInfo & linf, ParmString word,
1213                      CheckInfo & ci, GuessInfo * gi,
1214                      int optflags, AffEntry* ppfx)
1215 {
1216   unsigned              tmpl;		 // length of tmpword
1217   int			cond;		 // condition beng examined
1218   WordEntry             wordinfo;        // hash entry pointer
1219   byte *	cp;
1220   VARARRAYM(char, tmpword, word.size()+stripl+1, MAXWORDLEN+1);
1221   PfxEntry* ep = (PfxEntry *) ppfx;
1222 
1223   // if this suffix is being cross checked with a prefix
1224   // but it does not support cross products skip it
1225 
1226   if ((optflags & XPRODUCT) != 0 &&  (xpflg & XPRODUCT) == 0)
1227     return false;
1228 
1229   // upon entry suffix is 0 length or already matches the end of the word.
1230   // So if the remaining root word has positive length
1231   // and if there are enough chars in root word and added back strip chars
1232   // to meet the number of characters conditions, then test it
1233 
1234   tmpl = word.size() - appndl;
1235 
1236   if ((tmpl > 0)  &&  (tmpl + stripl >= conds->num)) {
1237 
1238     // generate new root word by removing suffix and adding
1239     // back any characters that would have been stripped or
1240     // or null terminating the shorter string
1241 
1242     strcpy (tmpword, word);
1243     cp = (byte *)(tmpword + tmpl);
1244     if (stripl) {
1245       strcpy ((char *)cp, strip);
1246       tmpl += stripl;
1247       cp = (byte *)(tmpword + tmpl);
1248     } else *cp = '\0';
1249 
1250     // now make sure all of the conditions on characters
1251     // are met.  Please see the appendix at the end of
1252     // this file for more info on exactly what is being
1253     // tested
1254 
1255     for (cond = conds->num;  --cond >= 0; ) {
1256       if ((conds->get(*--cp) & (1 << cond)) == 0) break;
1257     }
1258 
1259     // if all conditions are met then check if resulting
1260     // root word in the dictionary
1261 
1262     if (cond < 0) {
1263       CheckInfo * lci = 0;
1264       tmpl += stripl;
1265       const SensitiveCompare * cmp =
1266         optflags & XPRODUCT ? &linf.sp->s_cmp_middle : &linf.sp->s_cmp_begin;
1267       int res = linf.lookup(tmpword, cmp, achar, wordinfo, gi);
1268       if (res == 1
1269           && ((optflags & XPRODUCT) == 0 || TESTAFF(wordinfo.aff, ep->achar)))
1270       {
1271         lci = &ci;
1272         lci->word = wordinfo.word;
1273       } else if (res == 1 && gi) {
1274         lci = gi->add();
1275         lci->word = wordinfo.word;
1276       } else if (res == -1) { // gi must be defined
1277         lci = gi->head;
1278       }
1279 
1280       if (lci) {
1281         lci->suf_flag = achar;
1282         lci->suf_strip_len = stripl;
1283         lci->suf_add_len = appndl;
1284         lci->suf_add = appnd;
1285       }
1286 
1287       if (lci == &ci) return true;
1288     }
1289   }
1290   return false;
1291 }
1292 
1293 //////////////////////////////////////////////////////////////////////
1294 //
1295 // new_affix_mgr
1296 //
1297 
1298 
new_affix_mgr(ParmString name,Conv & iconv,const Language * lang)1299 PosibErr<AffixMgr *> new_affix_mgr(ParmString name,
1300                                    Conv & iconv,
1301                                    const Language * lang)
1302 {
1303   if (name == "none")
1304     return 0;
1305   //CERR << "NEW AFFIX MGR\n";
1306   String file;
1307   file += lang->data_dir();
1308   file += '/';
1309   file += lang->name();
1310   file += "_affix.dat";
1311   AffixMgr * affix;
1312   affix = new AffixMgr(lang);
1313   PosibErrBase pe = affix->setup(file, iconv);
1314   if (pe.has_err()) {
1315     delete affix;
1316     return pe;
1317   } else {
1318     return affix;
1319   }
1320 }
1321 }
1322 
1323 /**************************************************************************
1324 
1325 Appendix:  Understanding Affix Code
1326 
1327 
1328 An affix is either a  prefix or a suffix attached to root words to make
1329 other words.
1330 
1331 Basically a Prefix or a Suffix is set of AffEntry objects
1332 which store information about the prefix or suffix along
1333 with supporting routines to check if a word has a particular
1334 prefix or suffix or a combination.
1335 
1336 The structure affentry is defined as follows:
1337 
1338 struct AffEntry
1339 {
1340    unsigned char achar;   // char used to represent the affix
1341    char * strip;          // string to strip before adding affix
1342    char * appnd;          // the affix string to add
1343    short  stripl;         // length of the strip string
1344    short  appndl;         // length of the affix string
1345    short  numconds;       // the number of conditions that must be met
1346    short  xpflg;          // flag: XPRODUCT- combine both prefix and suffix
1347    char   conds[SETSIZE]; // array which encodes the conditions to be met
1348 };
1349 
1350 
1351 Here is a suffix borrowed from the en_US.aff file.  This file
1352 is whitespace delimited.
1353 
1354 SFX D Y 4
1355 SFX D   0     e          d
1356 SFX D   y     ied        [^aeiou]y
1357 SFX D   0     ed         [^ey]
1358 SFX D   0     ed         [aeiou]y
1359 
1360 This information can be interpreted as follows:
1361 
1362 In the first line has 4 fields
1363 
1364 Field
1365 -----
1366 1     SFX - indicates this is a suffix
1367 2     D   - is the name of the character flag which represents this suffix
1368 3     Y   - indicates it can be combined with prefixes (cross product)
1369 4     4   - indicates that sequence of 4 affentry structures are needed to
1370                properly store the affix information
1371 
1372 The remaining lines describe the unique information for the 4 SfxEntry
1373 objects that make up this affix.  Each line can be interpreted
1374 as follows: (note fields 1 and 2 are as a check against line 1 info)
1375 
1376 Field
1377 -----
1378 1     SFX         - indicates this is a suffix
1379 2     D           - is the name of the character flag for this affix
1380 3     y           - the string of chars to strip off before adding affix
1381                          (a 0 here indicates the NULL string)
1382 4     ied         - the string of affix characters to add
1383 5     [^aeiou]y   - the conditions which must be met before the affix
1384                     can be applied
1385 
1386 Field 5 is interesting.  Since this is a suffix, field 5 tells us that
1387 there are 2 conditions that must be met.  The first condition is that
1388 the next to the last character in the word must *NOT* be any of the
1389 following "a", "e", "i", "o" or "u".  The second condition is that
1390 the last character of the word must end in "y".
1391 
1392 So how can we encode this information concisely and be able to
1393 test for both conditions in a fast manner?  The answer is found
1394 but studying the wonderful ispell code of Geoff Kuenning, et.al.
1395 (now available under a normal BSD license).
1396 
1397 If we set up a conds array of 256 bytes indexed (0 to 255) and access it
1398 using a character (cast to an unsigned char) of a string, we have 8 bits
1399 of information we can store about that character.  Specifically we
1400 could use each bit to say if that character is allowed in any of the
1401 last (or first for prefixes) 8 characters of the word.
1402 
1403 Basically, each character at one end of the word (up to the number
1404 of conditions) is used to index into the conds array and the resulting
1405 value found there says whether the that character is valid for a
1406 specific character position in the word.
1407 
1408 For prefixes, it does this by setting bit 0 if that char is valid
1409 in the first position, bit 1 if valid in the second position, and so on.
1410 
1411 If a bit is not set, then that char is not valid for that position in the
1412 word.
1413 
1414 If working with suffixes bit 0 is used for the character closest
1415 to the front, bit 1 for the next character towards the end, ...,
1416 with bit numconds-1 representing the last char at the end of the string.
1417 
1418 Note: since entries in the conds[] are 8 bits, only 8 conditions
1419 (read that only 8 character positions) can be examined at one
1420 end of a word (the beginning for prefixes and the end for suffixes.
1421 
1422 So to make this clearer, lets encode the conds array values for the
1423 first two affentries for the suffix D described earlier.
1424 
1425 
1426   For the first affentry:
1427      numconds = 1             (only examine the last character)
1428 
1429      conds['e'] =  (1 << 0)   (the word must end in an E)
1430      all others are all 0
1431 
1432   For the second affentry:
1433      numconds = 2             (only examine the last two characters)
1434 
1435      conds[X] = conds[X] | (1 << 0)     (aeiou are not allowed)
1436          where X is all characters *but* a, e, i, o, or u
1437 
1438 
1439      conds['y'] = (1 << 1)     (the last char must be a y)
1440      all other bits for all other entries in the conds array are zero
1441 
1442 
1443 **************************************************************************/
1444