1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * Copyright (C) 2002-2017 Németh László
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 *
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38 /* Munch a word list and generate a smaller root word list with affixes*/
39
40 #include <ctype.h>
41 #include <string.h>
42 #include <string>
43 #include <unistd.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stddef.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <fcntl.h>
51 #include <limits>
52
53 #include "munch.h"
54
main(int argc,char ** argv)55 int main(int argc, char** argv) {
56 int i, j, k, n;
57 int rl, p, nwl;
58 int al;
59
60 FILE* wrdlst;
61 FILE* afflst;
62
63 char *nword, *wf, *af;
64 char as[(MAX_PREFIXES + MAX_SUFFIXES)];
65 char* ap;
66
67 struct hentry* ep;
68 struct hentry* ep1;
69 struct affent* pfxp;
70 struct affent* sfxp;
71
72 (void)argc;
73
74 /* first parse the command line options */
75 /* arg1 - wordlist, arg2 - affix file */
76
77 if (argv[1]) {
78 wf = mystrdup(argv[1]);
79 } else {
80 fprintf(stderr, "correct syntax is:\n");
81 fprintf(stderr, "munch word_list_file affix_file\n");
82 exit(1);
83 }
84 if (argv[2]) {
85 af = mystrdup(argv[2]);
86 } else {
87 fprintf(stderr, "correct syntax is:\n");
88 fprintf(stderr, "munch word_list_file affix_file\n");
89 exit(1);
90 }
91
92 /* open the affix file */
93 afflst = fopen(af, "r");
94 if (!afflst) {
95 fprintf(stderr, "Error - could not open affix description file\n");
96 exit(1);
97 }
98
99 /* step one is to parse the affix file building up the internal
100 affix data structures */
101
102 numpfx = 0;
103 numsfx = 0;
104
105 if (parse_aff_file(afflst)) {
106 fprintf(stderr, "Error - in affix file loading\n");
107 exit(1);
108 }
109 fclose(afflst);
110
111 fprintf(stderr, "parsed in %d prefixes and %d suffixes\n", numpfx, numsfx);
112
113 /* affix file is now parsed so create hash table of wordlist on the fly */
114
115 /* open the wordlist */
116 wrdlst = fopen(wf, "r");
117 if (!wrdlst) {
118 fprintf(stderr, "Error - could not open word list file\n");
119 exit(1);
120 }
121
122 if (load_tables(wrdlst)) {
123 fprintf(stderr, "Error building hash tables\n");
124 exit(1);
125 }
126 fclose(wrdlst);
127
128 for (i = 0; i < tablesize; i++) {
129 ep = &tableptr[i];
130 if (ep->word == NULL)
131 continue;
132 for (; ep != NULL; ep = ep->next) {
133 numroots = 0;
134 aff_chk(ep->word, strlen(ep->word));
135 if (numroots) {
136 /* now there might be a number of combinations */
137 /* of prefixes and suffixes that might match this */
138 /* word. So how to choose? As a first shot look */
139 /* for the shortest remaining root word to */
140 /* to maximize the combinatorial power */
141
142 /* but be careful, do not REQUIRE a specific combination */
143 /* of a prefix and a suffix to generate the word since */
144 /* that violates the rule that the root word with just */
145 /* the prefix or just the suffix must also exist in the */
146 /* wordlist as well */
147
148 /* in fact because of the cross product issue, this not a */
149 /* simple choice since some combinations of previous */
150 /* prefixes and new suffixes may not be valid. */
151 /* The only way to know is to simply try them all */
152
153 rl = 1000;
154 p = -1;
155
156 for (j = 0; j < numroots; j++) {
157 /* first collect the root word info and build up */
158 /* the potential new affix string */
159 nword = (roots[j].hashent)->word;
160 nwl = strlen(nword);
161 *as = '\0';
162 ap = as;
163 if (roots[j].prefix)
164 *ap++ = (roots[j].prefix)->achar;
165 if (roots[j].suffix)
166 *ap++ = (roots[j].suffix)->achar;
167 if ((roots[j].hashent)->affstr) {
168 strcpy(ap, (roots[j].hashent)->affstr);
169 } else {
170 *ap = '\0';
171 }
172 al = strlen(as);
173
174 /* now expand the potential affix string to generate */
175 /* all legal words and make sure they all exist in the */
176 /* word list */
177 numwords = 0;
178 wlist[numwords].word = mystrdup(nword);
179 wlist[numwords].pallow = 0;
180 numwords++;
181 n = 0;
182 if (al)
183 expand_rootword(nword, nwl, as);
184 for (k = 0; k < numwords; k++) {
185 if (lookup(wlist[k].word))
186 n++;
187 free(wlist[k].word);
188 wlist[k].word = NULL;
189 wlist[k].pallow = 0;
190 }
191
192 /* if all exist in word list then okay */
193 if (n == numwords) {
194 if (nwl < rl) {
195 rl = nwl;
196 p = j;
197 }
198 }
199 }
200 if (p != -1) {
201 ep1 = roots[p].hashent;
202 pfxp = roots[p].prefix;
203 sfxp = roots[p].suffix;
204 ep1->keep = 1;
205 if (pfxp != NULL)
206 add_affix_char(ep1, pfxp->achar);
207 if (sfxp != NULL)
208 add_affix_char(ep1, sfxp->achar);
209 } else {
210 ep->keep = 1;
211 }
212 } else {
213 ep->keep = 1;
214 }
215 }
216 }
217
218 /* now output only the words to keep along with affixes info */
219 /* first count how many words that is */
220 k = 0;
221 for (i = 0; i < tablesize; i++) {
222 ep = &tableptr[i];
223 if (ep->word == NULL)
224 continue;
225 for (; ep != NULL; ep = ep->next) {
226 if (ep->keep > 0)
227 k++;
228 }
229 }
230 fprintf(stdout, "%d\n", k);
231
232 for (i = 0; i < tablesize; i++) {
233 ep = &tableptr[i];
234 if (ep->word == NULL)
235 continue;
236 for (; ep != NULL; ep = ep->next) {
237 if (ep->keep > 0) {
238 if (ep->affstr != NULL) {
239 fprintf(stdout, "%s/%s\n", ep->word, ep->affstr);
240 } else {
241 fprintf(stdout, "%s\n", ep->word);
242 }
243 }
244 }
245 }
246 return 0;
247 }
248
parse_aff_file(FILE * afflst)249 int parse_aff_file(FILE* afflst) {
250 int i, j;
251 int numents = 0;
252 char achar = '\0';
253 short ff = 0;
254 struct affent* ptr = NULL;
255 struct affent* nptr = NULL;
256 char* line = (char*)malloc(MAX_LN_LEN);
257
258 while (fgets(line, MAX_LN_LEN, afflst)) {
259 mychomp(line);
260 char ft = ' ';
261 fprintf(stderr, "parsing line: %s\n", line);
262 if (strncmp(line, "PFX", 3) == 0)
263 ft = 'P';
264 if (strncmp(line, "SFX", 3) == 0)
265 ft = 'S';
266 if (ft != ' ') {
267 char* tp = line;
268 char* piece;
269 i = 0;
270 ff = 0;
271 while ((piece = mystrsep(&tp, ' '))) {
272 if (*piece != '\0') {
273 switch (i) {
274 case 0:
275 break;
276 case 1: {
277 achar = *piece;
278 break;
279 }
280 case 2: {
281 if (*piece == 'Y')
282 ff = XPRODUCT;
283 break;
284 }
285 case 3: {
286 numents = atoi(piece);
287 if ((numents <= 0) || ((std::numeric_limits<size_t>::max() /
288 sizeof(struct affent)) < static_cast<size_t>(numents))) {
289 fprintf(stderr, "Error: too many entries: %d\n", numents);
290 numents = 0;
291 } else {
292 ptr = (struct affent*)malloc(numents * sizeof(struct affent));
293 ptr->achar = achar;
294 ptr->xpflg = ff;
295 fprintf(stderr, "parsing %c entries %d\n", achar, numents);
296 }
297 break;
298 }
299 default:
300 break;
301 }
302 i++;
303 }
304 free(piece);
305 }
306 /* now parse all of the sub entries*/
307 nptr = ptr;
308 for (j = 0; j < numents; j++) {
309 if (!fgets(line, MAX_LN_LEN, afflst))
310 return 1;
311 mychomp(line);
312 tp = line;
313 i = 0;
314 while ((piece = mystrsep(&tp, ' '))) {
315 if (*piece != '\0') {
316 switch (i) {
317 case 0: {
318 if (nptr != ptr) {
319 nptr->achar = ptr->achar;
320 nptr->xpflg = ptr->xpflg;
321 }
322 break;
323 }
324 case 1:
325 break;
326 case 2: {
327 nptr->strip = mystrdup(piece);
328 nptr->stripl = strlen(nptr->strip);
329 if (strcmp(nptr->strip, "0") == 0) {
330 free(nptr->strip);
331 nptr->strip = mystrdup("");
332 nptr->stripl = 0;
333 }
334 break;
335 }
336 case 3: {
337 nptr->appnd = mystrdup(piece);
338 nptr->appndl = strlen(nptr->appnd);
339 if (strcmp(nptr->appnd, "0") == 0) {
340 free(nptr->appnd);
341 nptr->appnd = mystrdup("");
342 nptr->appndl = 0;
343 }
344 break;
345 }
346 case 4: {
347 encodeit(nptr, piece);
348 }
349 fprintf(stderr, " affix: %s %d, strip: %s %d\n", nptr->appnd,
350 nptr->appndl, nptr->strip, nptr->stripl);
351 // no break
352 default:
353 break;
354 }
355 i++;
356 }
357 free(piece);
358 }
359 nptr++;
360 }
361 if (ft == 'P') {
362 if (numpfx < MAX_PREFIXES) {
363 ptable[numpfx].aep = ptr;
364 ptable[numpfx].num = numents;
365 fprintf(stderr, "ptable %d num is %d\n", numpfx, ptable[numpfx].num);
366 numpfx++;
367 } else {
368 fprintf(stderr, "prefix buffer ptable is full\n");
369 }
370 } else {
371 if (numsfx < MAX_SUFFIXES) {
372 stable[numsfx].aep = ptr;
373 stable[numsfx].num = numents;
374 fprintf(stderr, "stable %d num is %d\n", numsfx, stable[numsfx].num);
375 numsfx++;
376 } else {
377 fprintf(stderr, "suffix buffer stable is full\n");
378 }
379 }
380 ptr = NULL;
381 nptr = NULL;
382 numents = 0;
383 achar = '\0';
384 }
385 }
386 free(line);
387 return 0;
388 }
389
encodeit(struct affent * ptr,char * cs)390 void encodeit(struct affent* ptr, char* cs) {
391 int nc;
392 int neg;
393 int grp;
394 int n;
395 int ec;
396 int nm;
397 int i, j, k;
398 unsigned char mbr[MAX_WD_LEN];
399
400 /* now clear the conditions array */
401 for (i = 0; i < SET_SIZE; i++)
402 ptr->conds[i] = (unsigned char)0;
403
404 /* now parse the string to create the conds array */
405 nc = strlen(cs);
406 neg = 0; /* complement indicator */
407 grp = 0; /* group indicator */
408 n = 0; /* number of conditions */
409 ec = 0; /* end condition indicator */
410 nm = 0; /* number of member in group */
411 i = 0;
412 if (strcmp(cs, ".") == 0) {
413 ptr->numconds = 0;
414 return;
415 }
416 while (i < nc) {
417 unsigned char c = *((unsigned char*)(cs + i));
418 if (c == '[') {
419 grp = 1;
420 c = 0;
421 }
422 if ((grp == 1) && (c == '^')) {
423 neg = 1;
424 c = 0;
425 }
426 if (c == ']') {
427 ec = 1;
428 c = 0;
429 }
430 if ((grp == 1) && (c != 0)) {
431 *(mbr + nm) = c;
432 nm++;
433 c = 0;
434 }
435 if (c != 0) {
436 ec = 1;
437 }
438 if (ec) {
439 if (grp == 1) {
440 if (neg == 0) {
441 for (j = 0; j < nm; j++) {
442 k = (unsigned int)mbr[j];
443 ptr->conds[k] = ptr->conds[k] | (1 << n);
444 }
445 } else {
446 for (j = 0; j < SET_SIZE; j++)
447 ptr->conds[j] = ptr->conds[j] | (1 << n);
448 for (j = 0; j < nm; j++) {
449 k = (unsigned int)mbr[j];
450 ptr->conds[k] = ptr->conds[k] & ~(1 << n);
451 }
452 }
453 neg = 0;
454 grp = 0;
455 nm = 0;
456 } else {
457 /* not a group so just set the proper bit for this char */
458 /* but first handle special case of . inside condition */
459 if (c == '.') {
460 /* wild card character so set them all */
461 for (j = 0; j < SET_SIZE; j++)
462 ptr->conds[j] = ptr->conds[j] | (1 << n);
463 } else {
464 ptr->conds[(unsigned int)c] = ptr->conds[(unsigned int)c] | (1 << n);
465 }
466 }
467 n++;
468 ec = 0;
469 }
470 i++;
471 }
472 ptr->numconds = n;
473 return;
474 }
475
476 /* search for a prefix */
pfx_chk(const char * word,int len,struct affent * ep,int num)477 void pfx_chk(const char* word, int len, struct affent* ep, int num) {
478 struct affent* aent;
479 int cond;
480 struct hentry* hent;
481 int i;
482
483 for (aent = ep, i = num; i > 0; aent++, i--) {
484 int tlen = len - aent->appndl;
485
486 if (tlen > 0 &&
487 (aent->appndl == 0 || strncmp(aent->appnd, word, aent->appndl) == 0) &&
488 tlen + aent->stripl >= aent->numconds) {
489 std::string tword(aent->strip);
490 tword.append(word + aent->appndl);
491
492 /* now go through the conds and make sure they all match */
493 unsigned char* cp = (unsigned char*)tword.c_str();
494 for (cond = 0; cond < aent->numconds; cond++) {
495 if ((aent->conds[*cp++] & (1 << cond)) == 0)
496 break;
497 }
498
499 if (cond >= aent->numconds) {
500 if ((hent = lookup(tword.c_str())) != NULL) {
501 if (numroots < MAX_ROOTS) {
502 roots[numroots].hashent = hent;
503 roots[numroots].prefix = aent;
504 roots[numroots].suffix = NULL;
505 numroots++;
506 }
507 }
508 }
509 }
510 }
511 }
512
suf_chk(const char * word,int len,struct affent * ep,int num,struct affent * pfxent,int cpflag)513 void suf_chk(const char* word,
514 int len,
515 struct affent* ep,
516 int num,
517 struct affent* pfxent,
518 int cpflag) {
519 struct affent* aent;
520 int cond;
521 struct hentry* hent;
522 int i;
523
524 for (aent = ep, i = num; i > 0; aent++, i--) {
525 if ((cpflag & XPRODUCT) != 0 && (aent->xpflg & XPRODUCT) == 0)
526 continue;
527
528 int tlen = len - aent->appndl;
529 if (tlen > 0 &&
530 (aent->appndl == 0 || strcmp(aent->appnd, (word + tlen)) == 0) &&
531 tlen + aent->stripl >= aent->numconds) {
532 std::string tword(word);
533 tword.resize(tlen);
534 tword.append(aent->strip);
535 unsigned char* cp = (unsigned char*)(tword.c_str() + tword.size());
536
537 for (cond = aent->numconds; --cond >= 0;) {
538 if ((aent->conds[*--cp] & (1 << cond)) == 0)
539 break;
540 }
541 if (cond < 0) {
542 if ((hent = lookup(tword.c_str())) != NULL) {
543 if (numroots < MAX_ROOTS) {
544 roots[numroots].hashent = hent;
545 roots[numroots].prefix = pfxent;
546 roots[numroots].suffix = aent;
547 numroots++;
548 }
549 }
550 }
551 }
552 }
553 }
554
aff_chk(const char * word,int len)555 void aff_chk(const char* word, int len) {
556 int i;
557 int nh = 0;
558
559 if (len < 4)
560 return;
561
562 for (i = 0; i < numpfx; i++) {
563 pfx_chk(word, len, ptable[i].aep, ptable[i].num);
564 }
565
566 nh = numroots;
567
568 if (nh > 0) {
569 for (int j = 0; j < nh; j++) {
570 if (roots[j].prefix->xpflg & XPRODUCT) {
571 char* nword = mystrdup((roots[j].hashent)->word);
572 int nwl = strlen(nword);
573 for (i = 0; i < numsfx; i++) {
574 suf_chk(nword, nwl, stable[i].aep, stable[i].num, roots[j].prefix,
575 XPRODUCT);
576 }
577 free(nword);
578 }
579 }
580 }
581 for (i = 0; i < numsfx; i++) {
582 suf_chk(word, len, stable[i].aep, stable[i].num, NULL, 0);
583 }
584 }
585
586 /* lookup a root word in the hashtable */
587
lookup(const char * word)588 struct hentry* lookup(const char* word) {
589 struct hentry* dp;
590 dp = &tableptr[hash(word)];
591 if (dp->word == NULL)
592 return NULL;
593 for (; dp != NULL; dp = dp->next) {
594 if (strcmp(word, dp->word) == 0)
595 return dp;
596 }
597 return NULL;
598 }
599
600 /* add a word to the hash table */
601
add_word(char * word)602 int add_word(char* word) {
603 int i;
604 struct hentry* dp;
605 struct hentry* hp = (struct hentry*)malloc(sizeof(struct hentry));
606
607 hp->word = word;
608 hp->affstr = NULL;
609 hp->keep = 0;
610 hp->next = NULL;
611
612 i = hash(word);
613 dp = &tableptr[i];
614
615 if (dp->word == NULL) {
616 *dp = *hp;
617 free(hp);
618 } else {
619 while (dp->next != NULL)
620 dp = dp->next;
621 dp->next = hp;
622 }
623 return 0;
624 }
625
626 /* load a word list and build a hash table on the fly */
627
load_tables(FILE * wdlst)628 int load_tables(FILE* wdlst) {
629 char ts[MAX_LN_LEN];
630 int nExtra = 5;
631
632 /* first read the first line of file to get hash table size */
633 if (!fgets(ts, MAX_LN_LEN - 1, wdlst))
634 return 2;
635 mychomp(ts);
636 tablesize = atoi(ts);
637
638 if (tablesize <= 0 ||
639 (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / (int)sizeof(struct hentry*))) {
640 return 3;
641 }
642
643 tablesize += nExtra;
644 if ((tablesize % 2) == 0)
645 tablesize++;
646
647 /* allocate the hash table */
648 tableptr = (struct hentry*)calloc(tablesize, sizeof(struct hentry));
649 if (!tableptr)
650 return 3;
651
652 /* loop thorugh all words on much list and add to hash
653 * table and store away word and affix strings in tmpfile
654 */
655
656 while (fgets(ts, MAX_LN_LEN - 1, wdlst)) {
657 mychomp(ts);
658 char* ap = mystrdup(ts);
659 add_word(ap);
660 }
661 return 0;
662 }
663
664 /* the hash function is a simple load and rotate
665 * algorithm borrowed
666 */
667
hash(const char * word)668 int hash(const char* word) {
669 int i;
670 long hv = 0;
671 for (i = 0; i < 4 && *word != 0; i++)
672 hv = (hv << 8) | (*word++);
673 while (*word != 0) {
674 ROTATE(hv, ROTATE_LEN);
675 hv ^= (*word++);
676 }
677 return (unsigned long)hv % tablesize;
678 }
679
add_affix_char(struct hentry * ep,char ac)680 void add_affix_char(struct hentry* ep, char ac) {
681 int al;
682 int i;
683 char* tmp;
684 if (ep->affstr == NULL) {
685 ep->affstr = (char*)malloc(2);
686 *(ep->affstr) = ac;
687 *((ep->affstr) + 1) = '\0';
688 return;
689 }
690 al = strlen(ep->affstr);
691 for (i = 0; i < al; i++)
692 if (ac == (ep->affstr)[i])
693 return;
694 tmp = (char*)calloc(al + 2, 1);
695 memcpy(tmp, ep->affstr, (al + 1));
696 *(tmp + al) = ac;
697 *(tmp + al + 1) = '\0';
698 free(ep->affstr);
699 ep->affstr = tmp;
700 return;
701 }
702
703 /* add a prefix to word */
pfx_add(const char * word,int len,struct affent * ep,int num)704 void pfx_add(const char* word, int len, struct affent* ep, int num) {
705 struct affent* aent;
706 int cond;
707 unsigned char* cp;
708 int i;
709 char* pp;
710 char tword[MAX_WD_LEN];
711
712 for (aent = ep, i = num; i > 0; aent++, i--) {
713 /* now make sure all conditions match */
714 if ((len > aent->stripl) && (len >= aent->numconds)) {
715 cp = (unsigned char*)word;
716 for (cond = 0; cond < aent->numconds; cond++) {
717 if ((aent->conds[*cp++] & (1 << cond)) == 0)
718 break;
719 }
720 if (cond >= aent->numconds) {
721 /* we have a match so add prefix */
722 int tlen = 0;
723 if (aent->appndl) {
724 strncpy(tword, aent->appnd, MAX_WD_LEN - 1);
725 tword[MAX_WD_LEN - 1] = '\0';
726 tlen += aent->appndl;
727 }
728 pp = tword + tlen;
729 strcpy(pp, (word + aent->stripl));
730
731 if (numwords < MAX_WORDS) {
732 wlist[numwords].word = mystrdup(tword);
733 wlist[numwords].pallow = 0;
734 numwords++;
735 }
736 }
737 }
738 }
739 }
740
741 /* add a suffix to a word */
suf_add(const char * word,int len,struct affent * ep,int num)742 void suf_add(const char* word, int len, struct affent* ep, int num) {
743 struct affent* aent;
744 int cond;
745 unsigned char* cp;
746 int i;
747 char tword[MAX_WD_LEN];
748 char* pp;
749
750 for (aent = ep, i = num; i > 0; aent++, i--) {
751 /* if conditions hold on root word
752 * then strip off strip string and add suffix
753 */
754
755 if ((len > aent->stripl) && (len >= aent->numconds)) {
756 cp = (unsigned char*)(word + len);
757 for (cond = aent->numconds; --cond >= 0;) {
758 if ((aent->conds[*--cp] & (1 << cond)) == 0)
759 break;
760 }
761 if (cond < 0) {
762 /* we have a matching condition */
763 int tlen = len;
764 strncpy(tword, word, MAX_WD_LEN - 1);
765 tword[MAX_WD_LEN - 1] = '\0';
766 if (aent->stripl) {
767 tlen -= aent->stripl;
768 }
769 pp = (tword + tlen);
770 if (aent->appndl) {
771 strcpy(pp, aent->appnd);
772 } else
773 *pp = '\0';
774
775 if (numwords < MAX_WORDS) {
776 wlist[numwords].word = mystrdup(tword);
777 wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
778 numwords++;
779 }
780 }
781 }
782 }
783 }
784
expand_rootword(const char * ts,int wl,const char * ap)785 int expand_rootword(const char* ts, int wl, const char* ap) {
786 int i;
787 int nh = 0;
788
789 for (i = 0; i < numsfx; i++) {
790 if (strchr(ap, (stable[i].aep)->achar)) {
791 suf_add(ts, wl, stable[i].aep, stable[i].num);
792 }
793 }
794
795 nh = numwords;
796
797 if (nh > 1) {
798 for (int j = 1; j < nh; j++) {
799 if (wlist[j].pallow) {
800 for (i = 0; i < numpfx; i++) {
801 if (strchr(ap, (ptable[i].aep)->achar)) {
802 if ((ptable[i].aep)->xpflg & XPRODUCT) {
803 int nwl = strlen(wlist[j].word);
804 pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
805 }
806 }
807 }
808 }
809 }
810 }
811
812 for (i = 0; i < numpfx; i++) {
813 if (strchr(ap, (ptable[i].aep)->achar)) {
814 pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
815 }
816 }
817 return 0;
818 }
819
820 /* strip strings into token based on single char delimiter
821 * acts like strsep() but only uses a delim char and not
822 * a delim string
823 */
mystrsep(char ** stringp,const char delim)824 char* mystrsep(char** stringp, const char delim) {
825 char* rv = NULL;
826 char* mp = *stringp;
827 int n = strlen(mp);
828 if (n > 0) {
829 char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n);
830 if (dp) {
831 ptrdiff_t nc;
832 *stringp = dp + 1;
833 nc = dp - mp;
834 rv = (char*)malloc(nc + 1);
835 if (rv) {
836 memcpy(rv, mp, nc);
837 *(rv + nc) = '\0';
838 }
839 } else {
840 rv = (char*)malloc(n + 1);
841 if (rv) {
842 memcpy(rv, mp, n);
843 *(rv + n) = '\0';
844 *stringp = mp + n;
845 }
846 }
847 }
848 return rv;
849 }
850
mystrdup(const char * s)851 char* mystrdup(const char* s) {
852 char* d = NULL;
853 if (s) {
854 int sl = strlen(s) + 1;
855 d = (char*)malloc(sl);
856 if (d)
857 memcpy(d, s, sl);
858 }
859 return d;
860 }
861
mychomp(char * s)862 void mychomp(char* s) {
863 int k = strlen(s);
864 if (k > 0)
865 *(s + k - 1) = '\0';
866 if ((k > 1) && (*(s + k - 2) == '\r'))
867 *(s + k - 2) = '\0';
868 }
869