1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * Copyright (C) 2002-2017 Németh László
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17  *
18  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either the GNU General Public License Version 2 or later (the "GPL"), or
26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 
38 /* Munch a word list and generate a smaller root word list with affixes*/
39 
40 #include <ctype.h>
41 #include <string.h>
42 #include <string>
43 #include <unistd.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stddef.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <fcntl.h>
51 #include <limits>
52 
53 #include "munch.h"
54 
main(int argc,char ** argv)55 int main(int argc, char** argv) {
56   int i, j, k, n;
57   int rl, p, nwl;
58   int al;
59 
60   FILE* wrdlst;
61   FILE* afflst;
62 
63   char *nword, *wf, *af;
64   char as[(MAX_PREFIXES + MAX_SUFFIXES)];
65   char* ap;
66 
67   struct hentry* ep;
68   struct hentry* ep1;
69   struct affent* pfxp;
70   struct affent* sfxp;
71 
72   (void)argc;
73 
74   /* first parse the command line options */
75   /* arg1 - wordlist, arg2 - affix file */
76 
77   if (argv[1]) {
78     wf = mystrdup(argv[1]);
79   } else {
80     fprintf(stderr, "correct syntax is:\n");
81     fprintf(stderr, "munch word_list_file affix_file\n");
82     exit(1);
83   }
84   if (argv[2]) {
85     af = mystrdup(argv[2]);
86   } else {
87     fprintf(stderr, "correct syntax is:\n");
88     fprintf(stderr, "munch word_list_file affix_file\n");
89     exit(1);
90   }
91 
92   /* open the affix file */
93   afflst = fopen(af, "r");
94   if (!afflst) {
95     fprintf(stderr, "Error - could not open affix description file\n");
96     exit(1);
97   }
98 
99   /* step one is to parse the affix file building up the internal
100      affix data structures */
101 
102   numpfx = 0;
103   numsfx = 0;
104 
105   if (parse_aff_file(afflst)) {
106     fprintf(stderr, "Error - in affix file loading\n");
107     exit(1);
108   }
109   fclose(afflst);
110 
111   fprintf(stderr, "parsed in %d prefixes and %d suffixes\n", numpfx, numsfx);
112 
113   /* affix file is now parsed so create hash table of wordlist on the fly */
114 
115   /* open the wordlist */
116   wrdlst = fopen(wf, "r");
117   if (!wrdlst) {
118     fprintf(stderr, "Error - could not open word list file\n");
119     exit(1);
120   }
121 
122   if (load_tables(wrdlst)) {
123     fprintf(stderr, "Error building hash tables\n");
124     exit(1);
125   }
126   fclose(wrdlst);
127 
128   for (i = 0; i < tablesize; i++) {
129     ep = &tableptr[i];
130     if (ep->word == NULL)
131       continue;
132     for (; ep != NULL; ep = ep->next) {
133       numroots = 0;
134       aff_chk(ep->word, strlen(ep->word));
135       if (numroots) {
136         /* now there might be a number of combinations */
137         /* of prefixes and suffixes that might match this */
138         /* word.  So how to choose?  As a first shot look */
139         /* for the shortest remaining root word to */
140         /* to maximize the combinatorial power */
141 
142         /* but be careful, do not REQUIRE a specific combination */
143         /* of a prefix and a suffix to generate the word since */
144         /* that violates the rule that the root word with just */
145         /* the prefix or just the suffix must also exist in the */
146         /* wordlist as well */
147 
148         /* in fact because of the cross product issue, this not a  */
149         /* simple choice since some combinations of previous */
150         /* prefixes and new suffixes may not be valid. */
151         /*  The only way to know is to simply try them all */
152 
153         rl = 1000;
154         p = -1;
155 
156         for (j = 0; j < numroots; j++) {
157           /* first collect the root word info and build up */
158           /* the potential new affix string */
159           nword = (roots[j].hashent)->word;
160           nwl = strlen(nword);
161           *as = '\0';
162           ap = as;
163           if (roots[j].prefix)
164             *ap++ = (roots[j].prefix)->achar;
165           if (roots[j].suffix)
166             *ap++ = (roots[j].suffix)->achar;
167           if ((roots[j].hashent)->affstr) {
168             strcpy(ap, (roots[j].hashent)->affstr);
169           } else {
170             *ap = '\0';
171           }
172           al = strlen(as);
173 
174           /* now expand the potential affix string to generate */
175           /* all legal words and make sure they all exist in the */
176           /* word list */
177           numwords = 0;
178           wlist[numwords].word = mystrdup(nword);
179           wlist[numwords].pallow = 0;
180           numwords++;
181           n = 0;
182           if (al)
183             expand_rootword(nword, nwl, as);
184           for (k = 0; k < numwords; k++) {
185             if (lookup(wlist[k].word))
186               n++;
187             free(wlist[k].word);
188             wlist[k].word = NULL;
189             wlist[k].pallow = 0;
190           }
191 
192           /* if all exist in word list then okay */
193           if (n == numwords) {
194             if (nwl < rl) {
195               rl = nwl;
196               p = j;
197             }
198           }
199         }
200         if (p != -1) {
201           ep1 = roots[p].hashent;
202           pfxp = roots[p].prefix;
203           sfxp = roots[p].suffix;
204           ep1->keep = 1;
205           if (pfxp != NULL)
206             add_affix_char(ep1, pfxp->achar);
207           if (sfxp != NULL)
208             add_affix_char(ep1, sfxp->achar);
209         } else {
210           ep->keep = 1;
211         }
212       } else {
213         ep->keep = 1;
214       }
215     }
216   }
217 
218   /* now output only the words to keep along with affixes info */
219   /* first count how many words that is */
220   k = 0;
221   for (i = 0; i < tablesize; i++) {
222     ep = &tableptr[i];
223     if (ep->word == NULL)
224       continue;
225     for (; ep != NULL; ep = ep->next) {
226       if (ep->keep > 0)
227         k++;
228     }
229   }
230   fprintf(stdout, "%d\n", k);
231 
232   for (i = 0; i < tablesize; i++) {
233     ep = &tableptr[i];
234     if (ep->word == NULL)
235       continue;
236     for (; ep != NULL; ep = ep->next) {
237       if (ep->keep > 0) {
238         if (ep->affstr != NULL) {
239           fprintf(stdout, "%s/%s\n", ep->word, ep->affstr);
240         } else {
241           fprintf(stdout, "%s\n", ep->word);
242         }
243       }
244     }
245   }
246   return 0;
247 }
248 
parse_aff_file(FILE * afflst)249 int parse_aff_file(FILE* afflst) {
250   int i, j;
251   int numents = 0;
252   char achar = '\0';
253   short ff = 0;
254   struct affent* ptr = NULL;
255   struct affent* nptr = NULL;
256   char* line = (char*)malloc(MAX_LN_LEN);
257 
258   while (fgets(line, MAX_LN_LEN, afflst)) {
259     mychomp(line);
260     char ft = ' ';
261     fprintf(stderr, "parsing line: %s\n", line);
262     if (strncmp(line, "PFX", 3) == 0)
263       ft = 'P';
264     if (strncmp(line, "SFX", 3) == 0)
265       ft = 'S';
266     if (ft != ' ') {
267       char* tp = line;
268       char* piece;
269       i = 0;
270       ff = 0;
271       while ((piece = mystrsep(&tp, ' '))) {
272         if (*piece != '\0') {
273           switch (i) {
274             case 0:
275               break;
276             case 1: {
277               achar = *piece;
278               break;
279             }
280             case 2: {
281               if (*piece == 'Y')
282                 ff = XPRODUCT;
283               break;
284             }
285             case 3: {
286               numents = atoi(piece);
287               if ((numents <= 0) || ((std::numeric_limits<size_t>::max() /
288                                       sizeof(struct affent)) < static_cast<size_t>(numents))) {
289                 fprintf(stderr, "Error: too many entries: %d\n", numents);
290                 numents = 0;
291               } else {
292                 ptr = (struct affent*)malloc(numents * sizeof(struct affent));
293                 ptr->achar = achar;
294                 ptr->xpflg = ff;
295                 fprintf(stderr, "parsing %c entries %d\n", achar, numents);
296               }
297               break;
298             }
299             default:
300               break;
301           }
302           i++;
303         }
304         free(piece);
305       }
306       /* now parse all of the sub entries*/
307       nptr = ptr;
308       for (j = 0; j < numents; j++) {
309         if (!fgets(line, MAX_LN_LEN, afflst))
310           return 1;
311         mychomp(line);
312         tp = line;
313         i = 0;
314         while ((piece = mystrsep(&tp, ' '))) {
315           if (*piece != '\0') {
316             switch (i) {
317               case 0: {
318                 if (nptr != ptr) {
319                   nptr->achar = ptr->achar;
320                   nptr->xpflg = ptr->xpflg;
321                 }
322                 break;
323               }
324               case 1:
325                 break;
326               case 2: {
327                 nptr->strip = mystrdup(piece);
328                 nptr->stripl = strlen(nptr->strip);
329                 if (strcmp(nptr->strip, "0") == 0) {
330                   free(nptr->strip);
331                   nptr->strip = mystrdup("");
332                   nptr->stripl = 0;
333                 }
334                 break;
335               }
336               case 3: {
337                 nptr->appnd = mystrdup(piece);
338                 nptr->appndl = strlen(nptr->appnd);
339                 if (strcmp(nptr->appnd, "0") == 0) {
340                   free(nptr->appnd);
341                   nptr->appnd = mystrdup("");
342                   nptr->appndl = 0;
343                 }
344                 break;
345               }
346               case 4: {
347                 encodeit(nptr, piece);
348               }
349                 fprintf(stderr, "   affix: %s %d, strip: %s %d\n", nptr->appnd,
350                         nptr->appndl, nptr->strip, nptr->stripl);
351                 // no break
352               default:
353                 break;
354             }
355             i++;
356           }
357           free(piece);
358         }
359         nptr++;
360       }
361       if (ft == 'P') {
362         if (numpfx < MAX_PREFIXES) {
363           ptable[numpfx].aep = ptr;
364           ptable[numpfx].num = numents;
365           fprintf(stderr, "ptable %d num is %d\n", numpfx, ptable[numpfx].num);
366           numpfx++;
367         } else {
368           fprintf(stderr, "prefix buffer ptable is full\n");
369         }
370       } else {
371         if (numsfx < MAX_SUFFIXES) {
372           stable[numsfx].aep = ptr;
373           stable[numsfx].num = numents;
374           fprintf(stderr, "stable %d num is %d\n", numsfx, stable[numsfx].num);
375           numsfx++;
376         } else {
377           fprintf(stderr, "suffix buffer stable is full\n");
378         }
379       }
380       ptr = NULL;
381       nptr = NULL;
382       numents = 0;
383       achar = '\0';
384     }
385   }
386   free(line);
387   return 0;
388 }
389 
encodeit(struct affent * ptr,char * cs)390 void encodeit(struct affent* ptr, char* cs) {
391   int nc;
392   int neg;
393   int grp;
394   int n;
395   int ec;
396   int nm;
397   int i, j, k;
398   unsigned char mbr[MAX_WD_LEN];
399 
400   /* now clear the conditions array */
401   for (i = 0; i < SET_SIZE; i++)
402     ptr->conds[i] = (unsigned char)0;
403 
404   /* now parse the string to create the conds array */
405   nc = strlen(cs);
406   neg = 0; /* complement indicator */
407   grp = 0; /* group indicator */
408   n = 0;   /* number of conditions */
409   ec = 0;  /* end condition indicator */
410   nm = 0;  /* number of member in group */
411   i = 0;
412   if (strcmp(cs, ".") == 0) {
413     ptr->numconds = 0;
414     return;
415   }
416   while (i < nc) {
417     unsigned char c = *((unsigned char*)(cs + i));
418     if (c == '[') {
419       grp = 1;
420       c = 0;
421     }
422     if ((grp == 1) && (c == '^')) {
423       neg = 1;
424       c = 0;
425     }
426     if (c == ']') {
427       ec = 1;
428       c = 0;
429     }
430     if ((grp == 1) && (c != 0)) {
431       *(mbr + nm) = c;
432       nm++;
433       c = 0;
434     }
435     if (c != 0) {
436       ec = 1;
437     }
438     if (ec) {
439       if (grp == 1) {
440         if (neg == 0) {
441           for (j = 0; j < nm; j++) {
442             k = (unsigned int)mbr[j];
443             ptr->conds[k] = ptr->conds[k] | (1 << n);
444           }
445         } else {
446           for (j = 0; j < SET_SIZE; j++)
447             ptr->conds[j] = ptr->conds[j] | (1 << n);
448           for (j = 0; j < nm; j++) {
449             k = (unsigned int)mbr[j];
450             ptr->conds[k] = ptr->conds[k] & ~(1 << n);
451           }
452         }
453         neg = 0;
454         grp = 0;
455         nm = 0;
456       } else {
457         /* not a group so just set the proper bit for this char */
458         /* but first handle special case of . inside condition */
459         if (c == '.') {
460           /* wild card character so set them all */
461           for (j = 0; j < SET_SIZE; j++)
462             ptr->conds[j] = ptr->conds[j] | (1 << n);
463         } else {
464           ptr->conds[(unsigned int)c] = ptr->conds[(unsigned int)c] | (1 << n);
465         }
466       }
467       n++;
468       ec = 0;
469     }
470     i++;
471   }
472   ptr->numconds = n;
473   return;
474 }
475 
476 /* search for a prefix */
pfx_chk(const char * word,int len,struct affent * ep,int num)477 void pfx_chk(const char* word, int len, struct affent* ep, int num) {
478   struct affent* aent;
479   int cond;
480   struct hentry* hent;
481   int i;
482 
483   for (aent = ep, i = num; i > 0; aent++, i--) {
484     int tlen = len - aent->appndl;
485 
486     if (tlen > 0 &&
487         (aent->appndl == 0 || strncmp(aent->appnd, word, aent->appndl) == 0) &&
488         tlen + aent->stripl >= aent->numconds) {
489       std::string tword(aent->strip);
490       tword.append(word + aent->appndl);
491 
492       /* now go through the conds and make sure they all match */
493       unsigned char* cp = (unsigned char*)tword.c_str();
494       for (cond = 0; cond < aent->numconds; cond++) {
495         if ((aent->conds[*cp++] & (1 << cond)) == 0)
496           break;
497       }
498 
499       if (cond >= aent->numconds) {
500         if ((hent = lookup(tword.c_str())) != NULL) {
501           if (numroots < MAX_ROOTS) {
502             roots[numroots].hashent = hent;
503             roots[numroots].prefix = aent;
504             roots[numroots].suffix = NULL;
505             numroots++;
506           }
507         }
508       }
509     }
510   }
511 }
512 
suf_chk(const char * word,int len,struct affent * ep,int num,struct affent * pfxent,int cpflag)513 void suf_chk(const char* word,
514              int len,
515              struct affent* ep,
516              int num,
517              struct affent* pfxent,
518              int cpflag) {
519   struct affent* aent;
520   int cond;
521   struct hentry* hent;
522   int i;
523 
524   for (aent = ep, i = num; i > 0; aent++, i--) {
525     if ((cpflag & XPRODUCT) != 0 && (aent->xpflg & XPRODUCT) == 0)
526       continue;
527 
528     int tlen = len - aent->appndl;
529     if (tlen > 0 &&
530         (aent->appndl == 0 || strcmp(aent->appnd, (word + tlen)) == 0) &&
531         tlen + aent->stripl >= aent->numconds) {
532       std::string tword(word);
533       tword.resize(tlen);
534       tword.append(aent->strip);
535       unsigned char* cp = (unsigned char*)(tword.c_str() + tword.size());
536 
537       for (cond = aent->numconds; --cond >= 0;) {
538         if ((aent->conds[*--cp] & (1 << cond)) == 0)
539           break;
540       }
541       if (cond < 0) {
542         if ((hent = lookup(tword.c_str())) != NULL) {
543           if (numroots < MAX_ROOTS) {
544             roots[numroots].hashent = hent;
545             roots[numroots].prefix = pfxent;
546             roots[numroots].suffix = aent;
547             numroots++;
548           }
549         }
550       }
551     }
552   }
553 }
554 
aff_chk(const char * word,int len)555 void aff_chk(const char* word, int len) {
556   int i;
557   int nh = 0;
558 
559   if (len < 4)
560     return;
561 
562   for (i = 0; i < numpfx; i++) {
563     pfx_chk(word, len, ptable[i].aep, ptable[i].num);
564   }
565 
566   nh = numroots;
567 
568   if (nh > 0) {
569     for (int j = 0; j < nh; j++) {
570       if (roots[j].prefix->xpflg & XPRODUCT) {
571         char* nword = mystrdup((roots[j].hashent)->word);
572         int nwl = strlen(nword);
573         for (i = 0; i < numsfx; i++) {
574           suf_chk(nword, nwl, stable[i].aep, stable[i].num, roots[j].prefix,
575                   XPRODUCT);
576         }
577         free(nword);
578       }
579     }
580   }
581   for (i = 0; i < numsfx; i++) {
582     suf_chk(word, len, stable[i].aep, stable[i].num, NULL, 0);
583   }
584 }
585 
586 /* lookup a root word in the hashtable */
587 
lookup(const char * word)588 struct hentry* lookup(const char* word) {
589   struct hentry* dp;
590   dp = &tableptr[hash(word)];
591   if (dp->word == NULL)
592     return NULL;
593   for (; dp != NULL; dp = dp->next) {
594     if (strcmp(word, dp->word) == 0)
595       return dp;
596   }
597   return NULL;
598 }
599 
600 /* add a word to the hash table */
601 
add_word(char * word)602 int add_word(char* word) {
603   int i;
604   struct hentry* dp;
605   struct hentry* hp = (struct hentry*)malloc(sizeof(struct hentry));
606 
607   hp->word = word;
608   hp->affstr = NULL;
609   hp->keep = 0;
610   hp->next = NULL;
611 
612   i = hash(word);
613   dp = &tableptr[i];
614 
615   if (dp->word == NULL) {
616     *dp = *hp;
617     free(hp);
618   } else {
619     while (dp->next != NULL)
620       dp = dp->next;
621     dp->next = hp;
622   }
623   return 0;
624 }
625 
626 /* load a word list and build a hash table on the fly */
627 
load_tables(FILE * wdlst)628 int load_tables(FILE* wdlst) {
629   char ts[MAX_LN_LEN];
630   int nExtra = 5;
631 
632   /* first read the first line of file to get hash table size */
633   if (!fgets(ts, MAX_LN_LEN - 1, wdlst))
634     return 2;
635   mychomp(ts);
636   tablesize = atoi(ts);
637 
638   if (tablesize <= 0 ||
639       (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / (int)sizeof(struct hentry*))) {
640     return 3;
641   }
642 
643   tablesize += nExtra;
644   if ((tablesize % 2) == 0)
645     tablesize++;
646 
647   /* allocate the hash table */
648   tableptr = (struct hentry*)calloc(tablesize, sizeof(struct hentry));
649   if (!tableptr)
650     return 3;
651 
652   /* loop thorugh all words on much list and add to hash
653    * table and store away word and affix strings in tmpfile
654    */
655 
656   while (fgets(ts, MAX_LN_LEN - 1, wdlst)) {
657     mychomp(ts);
658     char* ap = mystrdup(ts);
659     add_word(ap);
660   }
661   return 0;
662 }
663 
664 /* the hash function is a simple load and rotate
665  * algorithm borrowed
666  */
667 
hash(const char * word)668 int hash(const char* word) {
669   int i;
670   long hv = 0;
671   for (i = 0; i < 4 && *word != 0; i++)
672     hv = (hv << 8) | (*word++);
673   while (*word != 0) {
674     ROTATE(hv, ROTATE_LEN);
675     hv ^= (*word++);
676   }
677   return (unsigned long)hv % tablesize;
678 }
679 
add_affix_char(struct hentry * ep,char ac)680 void add_affix_char(struct hentry* ep, char ac) {
681   int al;
682   int i;
683   char* tmp;
684   if (ep->affstr == NULL) {
685     ep->affstr = (char*)malloc(2);
686     *(ep->affstr) = ac;
687     *((ep->affstr) + 1) = '\0';
688     return;
689   }
690   al = strlen(ep->affstr);
691   for (i = 0; i < al; i++)
692     if (ac == (ep->affstr)[i])
693       return;
694   tmp = (char*)calloc(al + 2, 1);
695   memcpy(tmp, ep->affstr, (al + 1));
696   *(tmp + al) = ac;
697   *(tmp + al + 1) = '\0';
698   free(ep->affstr);
699   ep->affstr = tmp;
700   return;
701 }
702 
703 /* add a prefix to word */
pfx_add(const char * word,int len,struct affent * ep,int num)704 void pfx_add(const char* word, int len, struct affent* ep, int num) {
705   struct affent* aent;
706   int cond;
707   unsigned char* cp;
708   int i;
709   char* pp;
710   char tword[MAX_WD_LEN];
711 
712   for (aent = ep, i = num; i > 0; aent++, i--) {
713     /* now make sure all conditions match */
714     if ((len > aent->stripl) && (len >= aent->numconds)) {
715       cp = (unsigned char*)word;
716       for (cond = 0; cond < aent->numconds; cond++) {
717         if ((aent->conds[*cp++] & (1 << cond)) == 0)
718           break;
719       }
720       if (cond >= aent->numconds) {
721         /* we have a match so add prefix */
722         int tlen = 0;
723         if (aent->appndl) {
724           strncpy(tword, aent->appnd, MAX_WD_LEN - 1);
725           tword[MAX_WD_LEN - 1] = '\0';
726           tlen += aent->appndl;
727         }
728         pp = tword + tlen;
729         strcpy(pp, (word + aent->stripl));
730 
731         if (numwords < MAX_WORDS) {
732           wlist[numwords].word = mystrdup(tword);
733           wlist[numwords].pallow = 0;
734           numwords++;
735         }
736       }
737     }
738   }
739 }
740 
741 /* add a suffix to a word */
suf_add(const char * word,int len,struct affent * ep,int num)742 void suf_add(const char* word, int len, struct affent* ep, int num) {
743   struct affent* aent;
744   int cond;
745   unsigned char* cp;
746   int i;
747   char tword[MAX_WD_LEN];
748   char* pp;
749 
750   for (aent = ep, i = num; i > 0; aent++, i--) {
751     /* if conditions hold on root word
752      * then strip off strip string and add suffix
753      */
754 
755     if ((len > aent->stripl) && (len >= aent->numconds)) {
756       cp = (unsigned char*)(word + len);
757       for (cond = aent->numconds; --cond >= 0;) {
758         if ((aent->conds[*--cp] & (1 << cond)) == 0)
759           break;
760       }
761       if (cond < 0) {
762         /* we have a matching condition */
763         int tlen = len;
764         strncpy(tword, word, MAX_WD_LEN - 1);
765         tword[MAX_WD_LEN - 1] = '\0';
766         if (aent->stripl) {
767           tlen -= aent->stripl;
768         }
769         pp = (tword + tlen);
770         if (aent->appndl) {
771           strcpy(pp, aent->appnd);
772         } else
773           *pp = '\0';
774 
775         if (numwords < MAX_WORDS) {
776           wlist[numwords].word = mystrdup(tword);
777           wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
778           numwords++;
779         }
780       }
781     }
782   }
783 }
784 
expand_rootword(const char * ts,int wl,const char * ap)785 int expand_rootword(const char* ts, int wl, const char* ap) {
786   int i;
787   int nh = 0;
788 
789   for (i = 0; i < numsfx; i++) {
790     if (strchr(ap, (stable[i].aep)->achar)) {
791       suf_add(ts, wl, stable[i].aep, stable[i].num);
792     }
793   }
794 
795   nh = numwords;
796 
797   if (nh > 1) {
798     for (int j = 1; j < nh; j++) {
799       if (wlist[j].pallow) {
800         for (i = 0; i < numpfx; i++) {
801           if (strchr(ap, (ptable[i].aep)->achar)) {
802             if ((ptable[i].aep)->xpflg & XPRODUCT) {
803               int nwl = strlen(wlist[j].word);
804               pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
805             }
806           }
807         }
808       }
809     }
810   }
811 
812   for (i = 0; i < numpfx; i++) {
813     if (strchr(ap, (ptable[i].aep)->achar)) {
814       pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
815     }
816   }
817   return 0;
818 }
819 
820 /* strip strings into token based on single char delimiter
821  * acts like strsep() but only uses a delim char and not
822  * a delim string
823  */
mystrsep(char ** stringp,const char delim)824 char* mystrsep(char** stringp, const char delim) {
825   char* rv = NULL;
826   char* mp = *stringp;
827   int n = strlen(mp);
828   if (n > 0) {
829     char* dp = (char*)memchr(mp, (int)((unsigned char)delim), n);
830     if (dp) {
831       ptrdiff_t nc;
832       *stringp = dp + 1;
833       nc = dp - mp;
834       rv = (char*)malloc(nc + 1);
835       if (rv) {
836         memcpy(rv, mp, nc);
837         *(rv + nc) = '\0';
838       }
839     } else {
840       rv = (char*)malloc(n + 1);
841       if (rv) {
842         memcpy(rv, mp, n);
843         *(rv + n) = '\0';
844         *stringp = mp + n;
845       }
846     }
847   }
848   return rv;
849 }
850 
mystrdup(const char * s)851 char* mystrdup(const char* s) {
852   char* d = NULL;
853   if (s) {
854     int sl = strlen(s) + 1;
855     d = (char*)malloc(sl);
856     if (d)
857       memcpy(d, s, sl);
858   }
859   return d;
860 }
861 
mychomp(char * s)862 void mychomp(char* s) {
863   int k = strlen(s);
864   if (k > 0)
865     *(s + k - 1) = '\0';
866   if ((k > 1) && (*(s + k - 2) == '\r'))
867     *(s + k - 2) = '\0';
868 }
869