1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * Copyright (C) 2002-2017 Németh László
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17  *
18  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either the GNU General Public License Version 2 or later (the "GPL"), or
26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 /*
38  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39  * And Contributors.  All rights reserved.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  *
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  *
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  *
52  * 3. All modifications to the source code must be clearly marked as
53  *    such.  Binary redistributions based on modified source code
54  *    must be clearly marked as modified versions in the documentation
55  *    and/or other materials provided with the distribution.
56  *
57  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74 #include <ctype.h>
75 
76 #include "affentry.hxx"
77 #include "csutil.hxx"
78 
~AffEntry()79 AffEntry::~AffEntry() {
80   if (opts & aeLONGCOND)
81     free(c.l.conds2);
82   if (morphcode && !(opts & aeALIASM))
83     free(morphcode);
84   if (contclass && !(opts & aeALIASF))
85     free(contclass);
86 }
87 
PfxEntry(AffixMgr * pmgr)88 PfxEntry::PfxEntry(AffixMgr* pmgr)
89     // register affix manager
90     : pmyMgr(pmgr),
91       next(NULL),
92       nexteq(NULL),
93       nextne(NULL),
94       flgnxt(NULL) {
95 }
96 
97 // add prefix to this word assuming conditions hold
add(const char * word,size_t len)98 std::string PfxEntry::add(const char* word, size_t len) {
99   std::string result;
100   if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
101       (len >= numconds) && test_condition(word) &&
102       (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
103     /* we have a match so add prefix */
104     result.assign(appnd);
105     result.append(word + strip.size());
106   }
107   return result;
108 }
109 
nextchar(char * p)110 inline char* PfxEntry::nextchar(char* p) {
111   if (p) {
112     p++;
113     if (opts & aeLONGCOND) {
114       // jump to the 2nd part of the condition
115       if (p == c.conds + MAXCONDLEN_1)
116         return c.l.conds2;
117       // end of the MAXCONDLEN length condition
118     } else if (p == c.conds + MAXCONDLEN)
119       return NULL;
120     return *p ? p : NULL;
121   }
122   return NULL;
123 }
124 
test_condition(const char * st)125 inline int PfxEntry::test_condition(const char* st) {
126   const char* pos = NULL;  // group with pos input position
127   bool neg = false;        // complementer
128   bool ingroup = false;    // character in the group
129   if (numconds == 0)
130     return 1;
131   char* p = c.conds;
132   while (1) {
133     switch (*p) {
134       case '\0':
135         return 1;
136       case '[': {
137         neg = false;
138         ingroup = false;
139         p = nextchar(p);
140         pos = st;
141         break;
142       }
143       case '^': {
144         p = nextchar(p);
145         neg = true;
146         break;
147       }
148       case ']': {
149         if ((neg && ingroup) || (!neg && !ingroup))
150           return 0;
151         pos = NULL;
152         p = nextchar(p);
153         // skip the next character
154         if (!ingroup && *st)
155           for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
156             ;
157         if (*st == '\0' && p)
158           return 0;  // word <= condition
159         break;
160       }
161       case '.':
162         if (!pos) {  // dots are not metacharacters in groups: [.]
163           p = nextchar(p);
164           // skip the next character
165           for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
166             ;
167           if (*st == '\0' && p)
168             return 0;  // word <= condition
169           break;
170         }
171       /* FALLTHROUGH */
172       default: {
173         if (*st == *p) {
174           st++;
175           p = nextchar(p);
176           if ((opts & aeUTF8) && (*(st - 1) & 0x80)) {  // multibyte
177             while (p && (*p & 0xc0) == 0x80) {          // character
178               if (*p != *st) {
179                 if (!pos)
180                   return 0;
181                 st = pos;
182                 break;
183               }
184               p = nextchar(p);
185               st++;
186             }
187             if (pos && st != pos) {
188               ingroup = true;
189               while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
190               }
191             }
192           } else if (pos) {
193             ingroup = true;
194             while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
195             }
196           }
197         } else if (pos) {  // group
198           p = nextchar(p);
199         } else
200           return 0;
201       }
202     }
203     if (!p)
204       return 1;
205   }
206 }
207 
208 // check if this prefix entry matches
checkword(const char * word,int len,char in_compound,const FLAG needflag)209 struct hentry* PfxEntry::checkword(const char* word,
210                                    int len,
211                                    char in_compound,
212                                    const FLAG needflag) {
213   struct hentry* he;  // hash entry of root word or NULL
214 
215   // on entry prefix is 0 length or already matches the beginning of the word.
216   // So if the remaining root word has positive length
217   // and if there are enough chars in root word and added back strip chars
218   // to meet the number of characters conditions, then test it
219 
220   int tmpl = len - appnd.size(); // length of tmpword
221 
222   if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
223     // generate new root word by removing prefix and adding
224     // back any characters that would have been stripped
225 
226     std::string tmpword(strip);
227     tmpword.append(word + appnd.size());
228 
229     // now make sure all of the conditions on characters
230     // are met.  Please see the appendix at the end of
231     // this file for more info on exactly what is being
232     // tested
233 
234     // if all conditions are met then check if resulting
235     // root word in the dictionary
236 
237     if (test_condition(tmpword.c_str())) {
238       tmpl += strip.size();
239       if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
240         do {
241           if (TESTAFF(he->astr, aflag, he->alen) &&
242               // forbid single prefixes with needaffix flag
243               !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
244               // needflag
245               ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
246                (contclass && TESTAFF(contclass, needflag, contclasslen))))
247             return he;
248           he = he->next_homonym;  // check homonyms
249         } while (he);
250       }
251 
252       // prefix matched but no root word was found
253       // if aeXPRODUCT is allowed, try again but now
254       // ross checked combined with a suffix
255 
256       // if ((opts & aeXPRODUCT) && in_compound) {
257       if ((opts & aeXPRODUCT)) {
258         he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
259                                   FLAG_NULL, needflag, in_compound);
260         if (he)
261           return he;
262       }
263     }
264   }
265   return NULL;
266 }
267 
268 // check if this prefix entry matches
check_twosfx(const char * word,int len,char in_compound,const FLAG needflag)269 struct hentry* PfxEntry::check_twosfx(const char* word,
270                                       int len,
271                                       char in_compound,
272                                       const FLAG needflag) {
273   // on entry prefix is 0 length or already matches the beginning of the word.
274   // So if the remaining root word has positive length
275   // and if there are enough chars in root word and added back strip chars
276   // to meet the number of characters conditions, then test it
277 
278   int tmpl = len - appnd.size(); // length of tmpword
279 
280   if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
281       (tmpl + strip.size() >= numconds)) {
282     // generate new root word by removing prefix and adding
283     // back any characters that would have been stripped
284 
285     std::string tmpword(strip);
286     tmpword.append(word + appnd.size());
287 
288     // now make sure all of the conditions on characters
289     // are met.  Please see the appendix at the end of
290     // this file for more info on exactly what is being
291     // tested
292 
293     // if all conditions are met then check if resulting
294     // root word in the dictionary
295 
296     if (test_condition(tmpword.c_str())) {
297       tmpl += strip.size();
298 
299       // prefix matched but no root word was found
300       // if aeXPRODUCT is allowed, try again but now
301       // cross checked combined with a suffix
302 
303       if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
304         // hash entry of root word or NULL
305         struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
306                                                         needflag);
307         if (he)
308           return he;
309       }
310     }
311   }
312   return NULL;
313 }
314 
315 // check if this prefix entry matches
check_twosfx_morph(const char * word,int len,char in_compound,const FLAG needflag)316 std::string PfxEntry::check_twosfx_morph(const char* word,
317                                          int len,
318                                          char in_compound,
319                                          const FLAG needflag) {
320   std::string result;
321   // on entry prefix is 0 length or already matches the beginning of the word.
322   // So if the remaining root word has positive length
323   // and if there are enough chars in root word and added back strip chars
324   // to meet the number of characters conditions, then test it
325   int tmpl = len - appnd.size(); // length of tmpword
326 
327   if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
328       (tmpl + strip.size() >= numconds)) {
329     // generate new root word by removing prefix and adding
330     // back any characters that would have been stripped
331 
332     std::string tmpword(strip);
333     tmpword.append(word + appnd.size());
334 
335     // now make sure all of the conditions on characters
336     // are met.  Please see the appendix at the end of
337     // this file for more info on exactly what is being
338     // tested
339 
340     // if all conditions are met then check if resulting
341     // root word in the dictionary
342 
343     if (test_condition(tmpword.c_str())) {
344       tmpl += strip.size();
345 
346       // prefix matched but no root word was found
347       // if aeXPRODUCT is allowed, try again but now
348       // ross checked combined with a suffix
349 
350       if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
351         result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
352                                                    aeXPRODUCT,
353                                                    this, needflag);
354       }
355     }
356   }
357   return result;
358 }
359 
360 // check if this prefix entry matches
check_morph(const char * word,int len,char in_compound,const FLAG needflag)361 std::string PfxEntry::check_morph(const char* word,
362                                   int len,
363                                   char in_compound,
364                                   const FLAG needflag) {
365   std::string result;
366 
367   // on entry prefix is 0 length or already matches the beginning of the word.
368   // So if the remaining root word has positive length
369   // and if there are enough chars in root word and added back strip chars
370   // to meet the number of characters conditions, then test it
371 
372   int tmpl = len - appnd.size(); // length of tmpword
373 
374   if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
375       (tmpl + strip.size() >= numconds)) {
376     // generate new root word by removing prefix and adding
377     // back any characters that would have been stripped
378 
379     std::string tmpword(strip);
380     tmpword.append(word + appnd.size());
381 
382     // now make sure all of the conditions on characters
383     // are met.  Please see the appendix at the end of
384     // this file for more info on exactly what is being
385     // tested
386 
387     // if all conditions are met then check if resulting
388     // root word in the dictionary
389 
390     if (test_condition(tmpword.c_str())) {
391       tmpl += strip.size();
392       struct hentry* he;  // hash entry of root word or NULL
393       if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
394         do {
395           if (TESTAFF(he->astr, aflag, he->alen) &&
396               // forbid single prefixes with needaffix flag
397               !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
398               // needflag
399               ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
400                (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
401             if (morphcode) {
402               result.push_back(MSEP_FLD);
403               result.append(morphcode);
404             } else
405               result.append(getKey());
406             if (!HENTRY_FIND(he, MORPH_STEM)) {
407               result.push_back(MSEP_FLD);
408               result.append(MORPH_STEM);
409               result.append(HENTRY_WORD(he));
410             }
411             // store the pointer of the hash entry
412             if (HENTRY_DATA(he)) {
413               result.push_back(MSEP_FLD);
414               result.append(HENTRY_DATA2(he));
415             } else {
416               // return with debug information
417               char* flag = pmyMgr->encode_flag(getFlag());
418               result.push_back(MSEP_FLD);
419               result.append(MORPH_FLAG);
420               result.append(flag);
421               free(flag);
422             }
423             result.push_back(MSEP_REC);
424           }
425           he = he->next_homonym;
426         } while (he);
427       }
428 
429       // prefix matched but no root word was found
430       // if aeXPRODUCT is allowed, try again but now
431       // ross checked combined with a suffix
432 
433       if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
434         std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
435                                                     FLAG_NULL, needflag);
436         if (!st.empty()) {
437           result.append(st);
438         }
439       }
440     }
441   }
442 
443   return result;
444 }
445 
SfxEntry(AffixMgr * pmgr)446 SfxEntry::SfxEntry(AffixMgr* pmgr)
447     : pmyMgr(pmgr)  // register affix manager
448       ,
449       next(NULL),
450       nexteq(NULL),
451       nextne(NULL),
452       flgnxt(NULL),
453       l_morph(NULL),
454       r_morph(NULL),
455       eq_morph(NULL) {
456 }
457 
458 // add suffix to this word assuming conditions hold
add(const char * word,size_t len)459 std::string SfxEntry::add(const char* word, size_t len) {
460   std::string result;
461   /* make sure all conditions match */
462   if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
463       (len >= numconds) && test_condition(word + len, word) &&
464       (!strip.size() ||
465        (strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
466     result.assign(word);
467     /* we have a match so add suffix */
468     result.replace(len - strip.size(), std::string::npos, appnd);
469   }
470   return result;
471 }
472 
nextchar(char * p)473 inline char* SfxEntry::nextchar(char* p) {
474   if (p) {
475     p++;
476     if (opts & aeLONGCOND) {
477       // jump to the 2nd part of the condition
478       if (p == c.l.conds1 + MAXCONDLEN_1)
479         return c.l.conds2;
480       // end of the MAXCONDLEN length condition
481     } else if (p == c.conds + MAXCONDLEN)
482       return NULL;
483     return *p ? p : NULL;
484   }
485   return NULL;
486 }
487 
test_condition(const char * st,const char * beg)488 inline int SfxEntry::test_condition(const char* st, const char* beg) {
489   const char* pos = NULL;  // group with pos input position
490   bool neg = false;        // complementer
491   bool ingroup = false;    // character in the group
492   if (numconds == 0)
493     return 1;
494   char* p = c.conds;
495   st--;
496   int i = 1;
497   while (1) {
498     switch (*p) {
499       case '\0':
500         return 1;
501       case '[':
502         p = nextchar(p);
503         pos = st;
504         break;
505       case '^':
506         p = nextchar(p);
507         neg = true;
508         break;
509       case ']':
510         if (!neg && !ingroup)
511           return 0;
512         i++;
513         // skip the next character
514         if (!ingroup) {
515           for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--)
516             ;
517           st--;
518         }
519         pos = NULL;
520         neg = false;
521         ingroup = false;
522         p = nextchar(p);
523         if (st < beg && p)
524           return 0;  // word <= condition
525         break;
526       case '.':
527         if (!pos) {
528           // dots are not metacharacters in groups: [.]
529           p = nextchar(p);
530           // skip the next character
531           for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80;
532                st--)
533             ;
534           if (st < beg) {  // word <= condition
535             if (p)
536               return 0;
537             else
538               return 1;
539           }
540           if ((opts & aeUTF8) && (*st & 0x80)) {  // head of the UTF-8 character
541             st--;
542             if (st < beg) {  // word <= condition
543               if (p)
544                 return 0;
545               else
546                 return 1;
547             }
548           }
549           break;
550         }
551       /* FALLTHROUGH */
552       default: {
553         if (*st == *p) {
554           p = nextchar(p);
555           if ((opts & aeUTF8) && (*st & 0x80)) {
556             st--;
557             while (p && (st >= beg)) {
558               if (*p != *st) {
559                 if (!pos)
560                   return 0;
561                 st = pos;
562                 break;
563               }
564               // first byte of the UTF-8 multibyte character
565               if ((*p & 0xc0) != 0x80)
566                 break;
567               p = nextchar(p);
568               st--;
569             }
570             if (pos && st != pos) {
571               if (neg)
572                 return 0;
573               else if (i == numconds)
574                 return 1;
575               ingroup = true;
576               while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
577               }
578               st--;
579             }
580             if (p && *p != ']')
581               p = nextchar(p);
582           } else if (pos) {
583             if (neg)
584               return 0;
585             else if (i == numconds)
586               return 1;
587             ingroup = true;
588             while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
589             }
590             //			if (p && *p != ']') p = nextchar(p);
591             st--;
592           }
593           if (!pos) {
594             i++;
595             st--;
596           }
597           if (st < beg && p && *p != ']')
598             return 0;      // word <= condition
599         } else if (pos) {  // group
600           p = nextchar(p);
601         } else
602           return 0;
603       }
604     }
605     if (!p)
606       return 1;
607   }
608 }
609 
610 // see if this suffix is present in the word
checkword(const char * word,int len,int optflags,PfxEntry * ppfx,const FLAG cclass,const FLAG needflag,const FLAG badflag)611 struct hentry* SfxEntry::checkword(const char* word,
612                                    int len,
613                                    int optflags,
614                                    PfxEntry* ppfx,
615                                    const FLAG cclass,
616                                    const FLAG needflag,
617                                    const FLAG badflag) {
618   struct hentry* he;  // hash entry pointer
619   PfxEntry* ep = ppfx;
620 
621   // if this suffix is being cross checked with a prefix
622   // but it does not support cross products skip it
623 
624   if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
625     return NULL;
626 
627   // upon entry suffix is 0 length or already matches the end of the word.
628   // So if the remaining root word has positive length
629   // and if there are enough chars in root word and added back strip chars
630   // to meet the number of characters conditions, then test it
631 
632   int tmpl = len - appnd.size(); // length of tmpword
633   // the second condition is not enough for UTF-8 strings
634   // it checked in test_condition()
635 
636   if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
637       (tmpl + strip.size() >= numconds)) {
638     // generate new root word by removing suffix and adding
639     // back any characters that would have been stripped or
640     // or null terminating the shorter string
641 
642     std::string tmpstring(word, tmpl);
643     if (strip.size()) {
644       tmpstring.append(strip);
645     }
646 
647     const char* tmpword = tmpstring.c_str();
648     const char* endword = tmpword + tmpstring.size();
649 
650     // now make sure all of the conditions on characters
651     // are met.  Please see the appendix at the end of
652     // this file for more info on exactly what is being
653     // tested
654 
655     // if all conditions are met then check if resulting
656     // root word in the dictionary
657 
658     if (test_condition(endword, tmpword)) {
659 #ifdef SZOSZABLYA_POSSIBLE_ROOTS
660       fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
661 #endif
662       if ((he = pmyMgr->lookup(tmpword)) != NULL) {
663         do {
664           // check conditional suffix (enabled by prefix)
665           if ((TESTAFF(he->astr, aflag, he->alen) ||
666                (ep && ep->getCont() &&
667                 TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
668               (((optflags & aeXPRODUCT) == 0) ||
669                (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
670                // enabled by prefix
671                ((contclass) &&
672                 (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) &&
673               // handle cont. class
674               ((!cclass) ||
675                ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
676               // check only in compound homonyms (bad flags)
677               (!badflag || !TESTAFF(he->astr, badflag, he->alen)) &&
678               // handle required flag
679               ((!needflag) ||
680                (TESTAFF(he->astr, needflag, he->alen) ||
681                 ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
682             return he;
683           he = he->next_homonym;  // check homonyms
684         } while (he);
685       }
686     }
687   }
688   return NULL;
689 }
690 
691 // see if two-level suffix is present in the word
check_twosfx(const char * word,int len,int optflags,PfxEntry * ppfx,const FLAG needflag)692 struct hentry* SfxEntry::check_twosfx(const char* word,
693                                       int len,
694                                       int optflags,
695                                       PfxEntry* ppfx,
696                                       const FLAG needflag) {
697   PfxEntry* ep = ppfx;
698 
699   // if this suffix is being cross checked with a prefix
700   // but it does not support cross products skip it
701 
702   if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
703     return NULL;
704 
705   // upon entry suffix is 0 length or already matches the end of the word.
706   // So if the remaining root word has positive length
707   // and if there are enough chars in root word and added back strip chars
708   // to meet the number of characters conditions, then test it
709 
710   int tmpl = len - appnd.size(); // length of tmpword
711 
712   if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
713       (tmpl + strip.size() >= numconds)) {
714     // generate new root word by removing suffix and adding
715     // back any characters that would have been stripped or
716     // or null terminating the shorter string
717 
718     std::string tmpword(word);
719     tmpword.resize(tmpl);
720     tmpword.append(strip);
721     tmpl += strip.size();
722 
723     const char* beg = tmpword.c_str();
724     const char* end = beg + tmpl;
725 
726     // now make sure all of the conditions on characters
727     // are met.  Please see the appendix at the end of
728     // this file for more info on exactly what is being
729     // tested
730 
731     // if all conditions are met then recall suffix_check
732 
733     if (test_condition(end, beg)) {
734       struct hentry* he;  // hash entry pointer
735       if (ppfx) {
736         // handle conditional suffix
737         if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
738           he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
739                                     (FLAG)aflag, needflag, IN_CPD_NOT);
740         else
741           he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx,
742                                     (FLAG)aflag, needflag, IN_CPD_NOT);
743       } else {
744         he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
745                                   (FLAG)aflag, needflag, IN_CPD_NOT);
746       }
747       if (he)
748         return he;
749     }
750   }
751   return NULL;
752 }
753 
754 // see if two-level suffix is present in the word
check_twosfx_morph(const char * word,int len,int optflags,PfxEntry * ppfx,const FLAG needflag)755 std::string SfxEntry::check_twosfx_morph(const char* word,
756                                          int len,
757                                          int optflags,
758                                          PfxEntry* ppfx,
759                                          const FLAG needflag) {
760   PfxEntry* ep = ppfx;
761 
762   std::string result;
763 
764   // if this suffix is being cross checked with a prefix
765   // but it does not support cross products skip it
766 
767   if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
768     return result;
769 
770   // upon entry suffix is 0 length or already matches the end of the word.
771   // So if the remaining root word has positive length
772   // and if there are enough chars in root word and added back strip chars
773   // to meet the number of characters conditions, then test it
774 
775   int tmpl = len - appnd.size(); // length of tmpword
776 
777   if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
778       (tmpl + strip.size() >= numconds)) {
779     // generate new root word by removing suffix and adding
780     // back any characters that would have been stripped or
781     // or null terminating the shorter string
782 
783     std::string tmpword(word);
784     tmpword.resize(tmpl);
785     tmpword.append(strip);
786     tmpl += strip.size();
787 
788     const char* beg = tmpword.c_str();
789     const char* end = beg + tmpl;
790 
791     // now make sure all of the conditions on characters
792     // are met.  Please see the appendix at the end of
793     // this file for more info on exactly what is being
794     // tested
795 
796     // if all conditions are met then recall suffix_check
797 
798     if (test_condition(end, beg)) {
799       if (ppfx) {
800         // handle conditional suffix
801         if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
802           std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
803                                                       needflag);
804           if (!st.empty()) {
805             if (ppfx->getMorph()) {
806               result.append(ppfx->getMorph());
807               result.push_back(MSEP_FLD);
808             }
809             result.append(st);
810             mychomp(result);
811           }
812         } else {
813           std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
814                                                       needflag);
815           if (!st.empty()) {
816             result.append(st);
817             mychomp(result);
818           }
819         }
820       } else {
821         std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
822         if (!st.empty()) {
823           result.append(st);
824           mychomp(result);
825         }
826       }
827     }
828   }
829   return result;
830 }
831 
832 // get next homonym with same affix
get_next_homonym(struct hentry * he,int optflags,PfxEntry * ppfx,const FLAG cclass,const FLAG needflag)833 struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
834                                           int optflags,
835                                           PfxEntry* ppfx,
836                                           const FLAG cclass,
837                                           const FLAG needflag) {
838   PfxEntry* ep = ppfx;
839   FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
840 
841   while (he->next_homonym) {
842     he = he->next_homonym;
843     if ((TESTAFF(he->astr, aflag, he->alen) ||
844          (ep && ep->getCont() &&
845           TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
846         ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) ||
847          // handle conditional suffix
848          ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) &&
849         // handle cont. class
850         ((!cclass) ||
851          ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
852         // handle required flag
853         ((!needflag) ||
854          (TESTAFF(he->astr, needflag, he->alen) ||
855           ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
856       return he;
857   }
858   return NULL;
859 }
860 
initReverseWord()861 void SfxEntry::initReverseWord() {
862   rappnd = appnd;
863   reverseword(rappnd);
864 }
865