1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * Copyright (C) 2002-2017 Németh László
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 *
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37 /*
38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39 * And Contributors. All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 *
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 *
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * 3. All modifications to the source code must be clearly marked as
53 * such. Binary redistributions based on modified source code
54 * must be clearly marked as modified versions in the documentation
55 * and/or other materials provided with the distribution.
56 *
57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74 #include <ctype.h>
75
76 #include "affentry.hxx"
77 #include "csutil.hxx"
78
~AffEntry()79 AffEntry::~AffEntry() {
80 if (opts & aeLONGCOND)
81 free(c.l.conds2);
82 if (morphcode && !(opts & aeALIASM))
83 free(morphcode);
84 if (contclass && !(opts & aeALIASF))
85 free(contclass);
86 }
87
PfxEntry(AffixMgr * pmgr)88 PfxEntry::PfxEntry(AffixMgr* pmgr)
89 // register affix manager
90 : pmyMgr(pmgr),
91 next(NULL),
92 nexteq(NULL),
93 nextne(NULL),
94 flgnxt(NULL) {
95 }
96
97 // add prefix to this word assuming conditions hold
add(const char * word,size_t len)98 std::string PfxEntry::add(const char* word, size_t len) {
99 std::string result;
100 if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
101 (len >= numconds) && test_condition(word) &&
102 (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
103 /* we have a match so add prefix */
104 result.assign(appnd);
105 result.append(word + strip.size());
106 }
107 return result;
108 }
109
nextchar(char * p)110 inline char* PfxEntry::nextchar(char* p) {
111 if (p) {
112 p++;
113 if (opts & aeLONGCOND) {
114 // jump to the 2nd part of the condition
115 if (p == c.conds + MAXCONDLEN_1)
116 return c.l.conds2;
117 // end of the MAXCONDLEN length condition
118 } else if (p == c.conds + MAXCONDLEN)
119 return NULL;
120 return *p ? p : NULL;
121 }
122 return NULL;
123 }
124
test_condition(const char * st)125 inline int PfxEntry::test_condition(const char* st) {
126 const char* pos = NULL; // group with pos input position
127 bool neg = false; // complementer
128 bool ingroup = false; // character in the group
129 if (numconds == 0)
130 return 1;
131 char* p = c.conds;
132 while (1) {
133 switch (*p) {
134 case '\0':
135 return 1;
136 case '[': {
137 neg = false;
138 ingroup = false;
139 p = nextchar(p);
140 pos = st;
141 break;
142 }
143 case '^': {
144 p = nextchar(p);
145 neg = true;
146 break;
147 }
148 case ']': {
149 if ((neg && ingroup) || (!neg && !ingroup))
150 return 0;
151 pos = NULL;
152 p = nextchar(p);
153 // skip the next character
154 if (!ingroup && *st)
155 for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
156 ;
157 if (*st == '\0' && p)
158 return 0; // word <= condition
159 break;
160 }
161 case '.':
162 if (!pos) { // dots are not metacharacters in groups: [.]
163 p = nextchar(p);
164 // skip the next character
165 for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
166 ;
167 if (*st == '\0' && p)
168 return 0; // word <= condition
169 break;
170 }
171 /* FALLTHROUGH */
172 default: {
173 if (*st == *p) {
174 st++;
175 p = nextchar(p);
176 if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
177 while (p && (*p & 0xc0) == 0x80) { // character
178 if (*p != *st) {
179 if (!pos)
180 return 0;
181 st = pos;
182 break;
183 }
184 p = nextchar(p);
185 st++;
186 }
187 if (pos && st != pos) {
188 ingroup = true;
189 while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
190 }
191 }
192 } else if (pos) {
193 ingroup = true;
194 while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
195 }
196 }
197 } else if (pos) { // group
198 p = nextchar(p);
199 } else
200 return 0;
201 }
202 }
203 if (!p)
204 return 1;
205 }
206 }
207
208 // check if this prefix entry matches
checkword(const char * word,int len,char in_compound,const FLAG needflag)209 struct hentry* PfxEntry::checkword(const char* word,
210 int len,
211 char in_compound,
212 const FLAG needflag) {
213 struct hentry* he; // hash entry of root word or NULL
214
215 // on entry prefix is 0 length or already matches the beginning of the word.
216 // So if the remaining root word has positive length
217 // and if there are enough chars in root word and added back strip chars
218 // to meet the number of characters conditions, then test it
219
220 int tmpl = len - appnd.size(); // length of tmpword
221
222 if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
223 // generate new root word by removing prefix and adding
224 // back any characters that would have been stripped
225
226 std::string tmpword(strip);
227 tmpword.append(word + appnd.size());
228
229 // now make sure all of the conditions on characters
230 // are met. Please see the appendix at the end of
231 // this file for more info on exactly what is being
232 // tested
233
234 // if all conditions are met then check if resulting
235 // root word in the dictionary
236
237 if (test_condition(tmpword.c_str())) {
238 tmpl += strip.size();
239 if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
240 do {
241 if (TESTAFF(he->astr, aflag, he->alen) &&
242 // forbid single prefixes with needaffix flag
243 !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
244 // needflag
245 ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
246 (contclass && TESTAFF(contclass, needflag, contclasslen))))
247 return he;
248 he = he->next_homonym; // check homonyms
249 } while (he);
250 }
251
252 // prefix matched but no root word was found
253 // if aeXPRODUCT is allowed, try again but now
254 // ross checked combined with a suffix
255
256 // if ((opts & aeXPRODUCT) && in_compound) {
257 if ((opts & aeXPRODUCT)) {
258 he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
259 FLAG_NULL, needflag, in_compound);
260 if (he)
261 return he;
262 }
263 }
264 }
265 return NULL;
266 }
267
268 // check if this prefix entry matches
check_twosfx(const char * word,int len,char in_compound,const FLAG needflag)269 struct hentry* PfxEntry::check_twosfx(const char* word,
270 int len,
271 char in_compound,
272 const FLAG needflag) {
273 // on entry prefix is 0 length or already matches the beginning of the word.
274 // So if the remaining root word has positive length
275 // and if there are enough chars in root word and added back strip chars
276 // to meet the number of characters conditions, then test it
277
278 int tmpl = len - appnd.size(); // length of tmpword
279
280 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
281 (tmpl + strip.size() >= numconds)) {
282 // generate new root word by removing prefix and adding
283 // back any characters that would have been stripped
284
285 std::string tmpword(strip);
286 tmpword.append(word + appnd.size());
287
288 // now make sure all of the conditions on characters
289 // are met. Please see the appendix at the end of
290 // this file for more info on exactly what is being
291 // tested
292
293 // if all conditions are met then check if resulting
294 // root word in the dictionary
295
296 if (test_condition(tmpword.c_str())) {
297 tmpl += strip.size();
298
299 // prefix matched but no root word was found
300 // if aeXPRODUCT is allowed, try again but now
301 // cross checked combined with a suffix
302
303 if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
304 // hash entry of root word or NULL
305 struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
306 needflag);
307 if (he)
308 return he;
309 }
310 }
311 }
312 return NULL;
313 }
314
315 // check if this prefix entry matches
check_twosfx_morph(const char * word,int len,char in_compound,const FLAG needflag)316 std::string PfxEntry::check_twosfx_morph(const char* word,
317 int len,
318 char in_compound,
319 const FLAG needflag) {
320 std::string result;
321 // on entry prefix is 0 length or already matches the beginning of the word.
322 // So if the remaining root word has positive length
323 // and if there are enough chars in root word and added back strip chars
324 // to meet the number of characters conditions, then test it
325 int tmpl = len - appnd.size(); // length of tmpword
326
327 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
328 (tmpl + strip.size() >= numconds)) {
329 // generate new root word by removing prefix and adding
330 // back any characters that would have been stripped
331
332 std::string tmpword(strip);
333 tmpword.append(word + appnd.size());
334
335 // now make sure all of the conditions on characters
336 // are met. Please see the appendix at the end of
337 // this file for more info on exactly what is being
338 // tested
339
340 // if all conditions are met then check if resulting
341 // root word in the dictionary
342
343 if (test_condition(tmpword.c_str())) {
344 tmpl += strip.size();
345
346 // prefix matched but no root word was found
347 // if aeXPRODUCT is allowed, try again but now
348 // ross checked combined with a suffix
349
350 if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
351 result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
352 aeXPRODUCT,
353 this, needflag);
354 }
355 }
356 }
357 return result;
358 }
359
360 // check if this prefix entry matches
check_morph(const char * word,int len,char in_compound,const FLAG needflag)361 std::string PfxEntry::check_morph(const char* word,
362 int len,
363 char in_compound,
364 const FLAG needflag) {
365 std::string result;
366
367 // on entry prefix is 0 length or already matches the beginning of the word.
368 // So if the remaining root word has positive length
369 // and if there are enough chars in root word and added back strip chars
370 // to meet the number of characters conditions, then test it
371
372 int tmpl = len - appnd.size(); // length of tmpword
373
374 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
375 (tmpl + strip.size() >= numconds)) {
376 // generate new root word by removing prefix and adding
377 // back any characters that would have been stripped
378
379 std::string tmpword(strip);
380 tmpword.append(word + appnd.size());
381
382 // now make sure all of the conditions on characters
383 // are met. Please see the appendix at the end of
384 // this file for more info on exactly what is being
385 // tested
386
387 // if all conditions are met then check if resulting
388 // root word in the dictionary
389
390 if (test_condition(tmpword.c_str())) {
391 tmpl += strip.size();
392 struct hentry* he; // hash entry of root word or NULL
393 if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
394 do {
395 if (TESTAFF(he->astr, aflag, he->alen) &&
396 // forbid single prefixes with needaffix flag
397 !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
398 // needflag
399 ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
400 (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
401 if (morphcode) {
402 result.push_back(MSEP_FLD);
403 result.append(morphcode);
404 } else
405 result.append(getKey());
406 if (!HENTRY_FIND(he, MORPH_STEM)) {
407 result.push_back(MSEP_FLD);
408 result.append(MORPH_STEM);
409 result.append(HENTRY_WORD(he));
410 }
411 // store the pointer of the hash entry
412 if (HENTRY_DATA(he)) {
413 result.push_back(MSEP_FLD);
414 result.append(HENTRY_DATA2(he));
415 } else {
416 // return with debug information
417 char* flag = pmyMgr->encode_flag(getFlag());
418 result.push_back(MSEP_FLD);
419 result.append(MORPH_FLAG);
420 result.append(flag);
421 free(flag);
422 }
423 result.push_back(MSEP_REC);
424 }
425 he = he->next_homonym;
426 } while (he);
427 }
428
429 // prefix matched but no root word was found
430 // if aeXPRODUCT is allowed, try again but now
431 // ross checked combined with a suffix
432
433 if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
434 std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
435 FLAG_NULL, needflag);
436 if (!st.empty()) {
437 result.append(st);
438 }
439 }
440 }
441 }
442
443 return result;
444 }
445
SfxEntry(AffixMgr * pmgr)446 SfxEntry::SfxEntry(AffixMgr* pmgr)
447 : pmyMgr(pmgr) // register affix manager
448 ,
449 next(NULL),
450 nexteq(NULL),
451 nextne(NULL),
452 flgnxt(NULL),
453 l_morph(NULL),
454 r_morph(NULL),
455 eq_morph(NULL) {
456 }
457
458 // add suffix to this word assuming conditions hold
add(const char * word,size_t len)459 std::string SfxEntry::add(const char* word, size_t len) {
460 std::string result;
461 /* make sure all conditions match */
462 if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
463 (len >= numconds) && test_condition(word + len, word) &&
464 (!strip.size() ||
465 (strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
466 result.assign(word);
467 /* we have a match so add suffix */
468 result.replace(len - strip.size(), std::string::npos, appnd);
469 }
470 return result;
471 }
472
nextchar(char * p)473 inline char* SfxEntry::nextchar(char* p) {
474 if (p) {
475 p++;
476 if (opts & aeLONGCOND) {
477 // jump to the 2nd part of the condition
478 if (p == c.l.conds1 + MAXCONDLEN_1)
479 return c.l.conds2;
480 // end of the MAXCONDLEN length condition
481 } else if (p == c.conds + MAXCONDLEN)
482 return NULL;
483 return *p ? p : NULL;
484 }
485 return NULL;
486 }
487
test_condition(const char * st,const char * beg)488 inline int SfxEntry::test_condition(const char* st, const char* beg) {
489 const char* pos = NULL; // group with pos input position
490 bool neg = false; // complementer
491 bool ingroup = false; // character in the group
492 if (numconds == 0)
493 return 1;
494 char* p = c.conds;
495 st--;
496 int i = 1;
497 while (1) {
498 switch (*p) {
499 case '\0':
500 return 1;
501 case '[':
502 p = nextchar(p);
503 pos = st;
504 break;
505 case '^':
506 p = nextchar(p);
507 neg = true;
508 break;
509 case ']':
510 if (!neg && !ingroup)
511 return 0;
512 i++;
513 // skip the next character
514 if (!ingroup) {
515 for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--)
516 ;
517 st--;
518 }
519 pos = NULL;
520 neg = false;
521 ingroup = false;
522 p = nextchar(p);
523 if (st < beg && p)
524 return 0; // word <= condition
525 break;
526 case '.':
527 if (!pos) {
528 // dots are not metacharacters in groups: [.]
529 p = nextchar(p);
530 // skip the next character
531 for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80;
532 st--)
533 ;
534 if (st < beg) { // word <= condition
535 if (p)
536 return 0;
537 else
538 return 1;
539 }
540 if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
541 st--;
542 if (st < beg) { // word <= condition
543 if (p)
544 return 0;
545 else
546 return 1;
547 }
548 }
549 break;
550 }
551 /* FALLTHROUGH */
552 default: {
553 if (*st == *p) {
554 p = nextchar(p);
555 if ((opts & aeUTF8) && (*st & 0x80)) {
556 st--;
557 while (p && (st >= beg)) {
558 if (*p != *st) {
559 if (!pos)
560 return 0;
561 st = pos;
562 break;
563 }
564 // first byte of the UTF-8 multibyte character
565 if ((*p & 0xc0) != 0x80)
566 break;
567 p = nextchar(p);
568 st--;
569 }
570 if (pos && st != pos) {
571 if (neg)
572 return 0;
573 else if (i == numconds)
574 return 1;
575 ingroup = true;
576 while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
577 }
578 st--;
579 }
580 if (p && *p != ']')
581 p = nextchar(p);
582 } else if (pos) {
583 if (neg)
584 return 0;
585 else if (i == numconds)
586 return 1;
587 ingroup = true;
588 while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
589 }
590 // if (p && *p != ']') p = nextchar(p);
591 st--;
592 }
593 if (!pos) {
594 i++;
595 st--;
596 }
597 if (st < beg && p && *p != ']')
598 return 0; // word <= condition
599 } else if (pos) { // group
600 p = nextchar(p);
601 } else
602 return 0;
603 }
604 }
605 if (!p)
606 return 1;
607 }
608 }
609
610 // see if this suffix is present in the word
checkword(const char * word,int len,int optflags,PfxEntry * ppfx,const FLAG cclass,const FLAG needflag,const FLAG badflag)611 struct hentry* SfxEntry::checkword(const char* word,
612 int len,
613 int optflags,
614 PfxEntry* ppfx,
615 const FLAG cclass,
616 const FLAG needflag,
617 const FLAG badflag) {
618 struct hentry* he; // hash entry pointer
619 PfxEntry* ep = ppfx;
620
621 // if this suffix is being cross checked with a prefix
622 // but it does not support cross products skip it
623
624 if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
625 return NULL;
626
627 // upon entry suffix is 0 length or already matches the end of the word.
628 // So if the remaining root word has positive length
629 // and if there are enough chars in root word and added back strip chars
630 // to meet the number of characters conditions, then test it
631
632 int tmpl = len - appnd.size(); // length of tmpword
633 // the second condition is not enough for UTF-8 strings
634 // it checked in test_condition()
635
636 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
637 (tmpl + strip.size() >= numconds)) {
638 // generate new root word by removing suffix and adding
639 // back any characters that would have been stripped or
640 // or null terminating the shorter string
641
642 std::string tmpstring(word, tmpl);
643 if (strip.size()) {
644 tmpstring.append(strip);
645 }
646
647 const char* tmpword = tmpstring.c_str();
648 const char* endword = tmpword + tmpstring.size();
649
650 // now make sure all of the conditions on characters
651 // are met. Please see the appendix at the end of
652 // this file for more info on exactly what is being
653 // tested
654
655 // if all conditions are met then check if resulting
656 // root word in the dictionary
657
658 if (test_condition(endword, tmpword)) {
659 #ifdef SZOSZABLYA_POSSIBLE_ROOTS
660 fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
661 #endif
662 if ((he = pmyMgr->lookup(tmpword)) != NULL) {
663 do {
664 // check conditional suffix (enabled by prefix)
665 if ((TESTAFF(he->astr, aflag, he->alen) ||
666 (ep && ep->getCont() &&
667 TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
668 (((optflags & aeXPRODUCT) == 0) ||
669 (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
670 // enabled by prefix
671 ((contclass) &&
672 (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) &&
673 // handle cont. class
674 ((!cclass) ||
675 ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
676 // check only in compound homonyms (bad flags)
677 (!badflag || !TESTAFF(he->astr, badflag, he->alen)) &&
678 // handle required flag
679 ((!needflag) ||
680 (TESTAFF(he->astr, needflag, he->alen) ||
681 ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
682 return he;
683 he = he->next_homonym; // check homonyms
684 } while (he);
685 }
686 }
687 }
688 return NULL;
689 }
690
691 // see if two-level suffix is present in the word
check_twosfx(const char * word,int len,int optflags,PfxEntry * ppfx,const FLAG needflag)692 struct hentry* SfxEntry::check_twosfx(const char* word,
693 int len,
694 int optflags,
695 PfxEntry* ppfx,
696 const FLAG needflag) {
697 PfxEntry* ep = ppfx;
698
699 // if this suffix is being cross checked with a prefix
700 // but it does not support cross products skip it
701
702 if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
703 return NULL;
704
705 // upon entry suffix is 0 length or already matches the end of the word.
706 // So if the remaining root word has positive length
707 // and if there are enough chars in root word and added back strip chars
708 // to meet the number of characters conditions, then test it
709
710 int tmpl = len - appnd.size(); // length of tmpword
711
712 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
713 (tmpl + strip.size() >= numconds)) {
714 // generate new root word by removing suffix and adding
715 // back any characters that would have been stripped or
716 // or null terminating the shorter string
717
718 std::string tmpword(word);
719 tmpword.resize(tmpl);
720 tmpword.append(strip);
721 tmpl += strip.size();
722
723 const char* beg = tmpword.c_str();
724 const char* end = beg + tmpl;
725
726 // now make sure all of the conditions on characters
727 // are met. Please see the appendix at the end of
728 // this file for more info on exactly what is being
729 // tested
730
731 // if all conditions are met then recall suffix_check
732
733 if (test_condition(end, beg)) {
734 struct hentry* he; // hash entry pointer
735 if (ppfx) {
736 // handle conditional suffix
737 if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
738 he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
739 (FLAG)aflag, needflag, IN_CPD_NOT);
740 else
741 he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx,
742 (FLAG)aflag, needflag, IN_CPD_NOT);
743 } else {
744 he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
745 (FLAG)aflag, needflag, IN_CPD_NOT);
746 }
747 if (he)
748 return he;
749 }
750 }
751 return NULL;
752 }
753
754 // see if two-level suffix is present in the word
check_twosfx_morph(const char * word,int len,int optflags,PfxEntry * ppfx,const FLAG needflag)755 std::string SfxEntry::check_twosfx_morph(const char* word,
756 int len,
757 int optflags,
758 PfxEntry* ppfx,
759 const FLAG needflag) {
760 PfxEntry* ep = ppfx;
761
762 std::string result;
763
764 // if this suffix is being cross checked with a prefix
765 // but it does not support cross products skip it
766
767 if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
768 return result;
769
770 // upon entry suffix is 0 length or already matches the end of the word.
771 // So if the remaining root word has positive length
772 // and if there are enough chars in root word and added back strip chars
773 // to meet the number of characters conditions, then test it
774
775 int tmpl = len - appnd.size(); // length of tmpword
776
777 if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
778 (tmpl + strip.size() >= numconds)) {
779 // generate new root word by removing suffix and adding
780 // back any characters that would have been stripped or
781 // or null terminating the shorter string
782
783 std::string tmpword(word);
784 tmpword.resize(tmpl);
785 tmpword.append(strip);
786 tmpl += strip.size();
787
788 const char* beg = tmpword.c_str();
789 const char* end = beg + tmpl;
790
791 // now make sure all of the conditions on characters
792 // are met. Please see the appendix at the end of
793 // this file for more info on exactly what is being
794 // tested
795
796 // if all conditions are met then recall suffix_check
797
798 if (test_condition(end, beg)) {
799 if (ppfx) {
800 // handle conditional suffix
801 if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
802 std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
803 needflag);
804 if (!st.empty()) {
805 if (ppfx->getMorph()) {
806 result.append(ppfx->getMorph());
807 result.push_back(MSEP_FLD);
808 }
809 result.append(st);
810 mychomp(result);
811 }
812 } else {
813 std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
814 needflag);
815 if (!st.empty()) {
816 result.append(st);
817 mychomp(result);
818 }
819 }
820 } else {
821 std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
822 if (!st.empty()) {
823 result.append(st);
824 mychomp(result);
825 }
826 }
827 }
828 }
829 return result;
830 }
831
832 // get next homonym with same affix
get_next_homonym(struct hentry * he,int optflags,PfxEntry * ppfx,const FLAG cclass,const FLAG needflag)833 struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
834 int optflags,
835 PfxEntry* ppfx,
836 const FLAG cclass,
837 const FLAG needflag) {
838 PfxEntry* ep = ppfx;
839 FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
840
841 while (he->next_homonym) {
842 he = he->next_homonym;
843 if ((TESTAFF(he->astr, aflag, he->alen) ||
844 (ep && ep->getCont() &&
845 TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
846 ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) ||
847 // handle conditional suffix
848 ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) &&
849 // handle cont. class
850 ((!cclass) ||
851 ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
852 // handle required flag
853 ((!needflag) ||
854 (TESTAFF(he->astr, needflag, he->alen) ||
855 ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
856 return he;
857 }
858 return NULL;
859 }
860
initReverseWord()861 void SfxEntry::initReverseWord() {
862 rappnd = appnd;
863 reverseword(rappnd);
864 }
865