1 #include "license.hunspell"
2 #include "license.myspell"
3 
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdio.h>
7 
8 #include "hunspell.hxx"
9 #include "hunspell.h"
10 #ifndef MOZILLA_CLIENT
11 #    include "config.h"
12 #endif
13 #include "csutil.hxx"
14 
15 #include <string>
16 
Hunspell(const char * affpath,const char * dpath,const char * key)17 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
18 {
19     encoding = NULL;
20     csconv = NULL;
21     utf8 = 0;
22     complexprefixes = 0;
23     affixpath = mystrdup(affpath);
24     maxdic = 0;
25 
26     /* first set up the hash manager */
27     pHMgr[0] = new HashMgr(dpath, affpath, key);
28     if (pHMgr[0]) maxdic = 1;
29 
30     /* next set up the affix manager */
31     /* it needs access to the hash manager lookup methods */
32     pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
33 
34     /* get the preferred try string and the dictionary */
35     /* encoding from the Affix Manager for that dictionary */
36     char * try_string = pAMgr->get_try_string();
37     encoding = pAMgr->get_encoding();
38     langnum = pAMgr->get_langnum();
39     utf8 = pAMgr->get_utf8();
40     if (!utf8)
41         csconv = get_current_cs(encoding);
42     complexprefixes = pAMgr->get_complexprefixes();
43     wordbreak = pAMgr->get_breaktable();
44 
45     /* and finally set up the suggestion manager */
46     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
47     if (try_string) free(try_string);
48 }
49 
~Hunspell()50 Hunspell::~Hunspell()
51 {
52     if (pSMgr) delete pSMgr;
53     if (pAMgr) delete pAMgr;
54     for (int i = 0; i < maxdic; i++) delete pHMgr[i];
55     maxdic = 0;
56     pSMgr = NULL;
57     pAMgr = NULL;
58 #ifdef MOZILLA_CLIENT
59     delete [] csconv;
60 #endif
61     csconv= NULL;
62     if (encoding) free(encoding);
63     encoding = NULL;
64     if (affixpath) free(affixpath);
65     affixpath = NULL;
66 }
67 
68 // load extra dictionaries
add_dic(const char * dpath,const char * key)69 int Hunspell::add_dic(const char * dpath, const char * key) {
70     if (maxdic == MAXDIC || !affixpath) return 1;
71     pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
72     if (pHMgr[maxdic]) maxdic++; else return 1;
73     return 0;
74 }
75 
76 // make a copy of src at destination while removing all leading
77 // blanks and removing any trailing periods after recording
78 // their presence with the abbreviation flag
79 // also since already going through character by character,
80 // set the capitalization type
81 // return the length of the "cleaned" (and UTF-8 encoded) word
82 
cleanword2(char * dest,const char * src,w_char * dest_utf,int * nc,int * pcaptype,int * pabbrev)83 int Hunspell::cleanword2(char * dest, const char * src,
84     w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
85 {
86    unsigned char * p = (unsigned char *) dest;
87    const unsigned char * q = (const unsigned char * ) src;
88 
89    // first skip over any leading blanks
90    while ((*q != '\0') && (*q == ' ')) q++;
91 
92    // now strip off any trailing periods (recording their presence)
93    *pabbrev = 0;
94    int nl = strlen((const char *)q);
95    while ((nl > 0) && (*(q+nl-1)=='.')) {
96        nl--;
97        (*pabbrev)++;
98    }
99 
100    // if no characters are left it can't be capitalized
101    if (nl <= 0) {
102        *pcaptype = NOCAP;
103        *p = '\0';
104        return 0;
105    }
106 
107    strncpy(dest, (char *) q, nl);
108    *(dest + nl) = '\0';
109    nl = strlen(dest);
110    if (utf8) {
111       *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
112       // don't check too long words
113       if (*nc >= MAXWORDLEN) return 0;
114       if (*nc == -1) { // big Unicode character (non BMP area)
115          *pcaptype = NOCAP;
116          return nl;
117       }
118      *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
119    } else {
120      *pcaptype = get_captype(dest, nl, csconv);
121      *nc = nl;
122    }
123    return nl;
124 }
125 
cleanword(char * dest,const char * src,int * pcaptype,int * pabbrev)126 int Hunspell::cleanword(char * dest, const char * src,
127     int * pcaptype, int * pabbrev)
128 {
129    unsigned char * p = (unsigned char *) dest;
130    const unsigned char * q = (const unsigned char * ) src;
131    int firstcap = 0;
132 
133    // first skip over any leading blanks
134    while ((*q != '\0') && (*q == ' ')) q++;
135 
136    // now strip off any trailing periods (recording their presence)
137    *pabbrev = 0;
138    int nl = strlen((const char *)q);
139    while ((nl > 0) && (*(q+nl-1)=='.')) {
140        nl--;
141        (*pabbrev)++;
142    }
143 
144    // if no characters are left it can't be capitalized
145    if (nl <= 0) {
146        *pcaptype = NOCAP;
147        *p = '\0';
148        return 0;
149    }
150 
151    // now determine the capitalization type of the first nl letters
152    int ncap = 0;
153    int nneutral = 0;
154    int nc = 0;
155 
156    if (!utf8) {
157       while (nl > 0) {
158          nc++;
159          if (csconv[(*q)].ccase) ncap++;
160          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
161          *p++ = *q++;
162          nl--;
163       }
164       // remember to terminate the destination string
165       *p = '\0';
166       firstcap = csconv[(unsigned char)(*dest)].ccase;
167    } else {
168       unsigned short idx;
169       w_char t[MAXWORDLEN];
170       nc = u8_u16(t, MAXWORDLEN, src);
171       for (int i = 0; i < nc; i++) {
172          idx = (t[i].h << 8) + t[i].l;
173          unsigned short low = unicodetolower(idx, langnum);
174          if (idx != low) ncap++;
175          if (unicodetoupper(idx, langnum) == low) nneutral++;
176       }
177       u16_u8(dest, MAXWORDUTF8LEN, t, nc);
178       if (ncap) {
179          idx = (t[0].h << 8) + t[0].l;
180          firstcap = (idx != unicodetolower(idx, langnum));
181       }
182    }
183 
184    // now finally set the captype
185    if (ncap == 0) {
186         *pcaptype = NOCAP;
187    } else if ((ncap == 1) && firstcap) {
188         *pcaptype = INITCAP;
189    } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
190         *pcaptype = ALLCAP;
191    } else if ((ncap > 1) && firstcap) {
192         *pcaptype = HUHINITCAP;
193    } else {
194         *pcaptype = HUHCAP;
195    }
196    return strlen(dest);
197 }
198 
mkallcap(char * p)199 void Hunspell::mkallcap(char * p)
200 {
201   if (utf8) {
202       w_char u[MAXWORDLEN];
203       int nc = u8_u16(u, MAXWORDLEN, p);
204       unsigned short idx;
205       for (int i = 0; i < nc; i++) {
206          idx = (u[i].h << 8) + u[i].l;
207          if (idx != unicodetoupper(idx, langnum)) {
208             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
209             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
210          }
211       }
212       u16_u8(p, MAXWORDUTF8LEN, u, nc);
213   } else {
214     while (*p != '\0') {
215         *p = csconv[((unsigned char) *p)].cupper;
216         p++;
217     }
218   }
219 }
220 
mkallcap2(char * p,w_char * u,int nc)221 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
222 {
223   if (utf8) {
224       unsigned short idx;
225       for (int i = 0; i < nc; i++) {
226          idx = (u[i].h << 8) + u[i].l;
227          unsigned short up = unicodetoupper(idx, langnum);
228          if (idx != up) {
229             u[i].h = (unsigned char) (up >> 8);
230             u[i].l = (unsigned char) (up & 0x00FF);
231          }
232       }
233       u16_u8(p, MAXWORDUTF8LEN, u, nc);
234       return strlen(p);
235   } else {
236     while (*p != '\0') {
237         *p = csconv[((unsigned char) *p)].cupper;
238         p++;
239     }
240   }
241   return nc;
242 }
243 
244 
mkallsmall(char * p)245 void Hunspell::mkallsmall(char * p)
246 {
247     while (*p != '\0') {
248         *p = csconv[((unsigned char) *p)].clower;
249         p++;
250     }
251 }
252 
mkallsmall2(char * p,w_char * u,int nc)253 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
254 {
255   if (utf8) {
256       unsigned short idx;
257       for (int i = 0; i < nc; i++) {
258          idx = (u[i].h << 8) + u[i].l;
259          unsigned short low = unicodetolower(idx, langnum);
260          if (idx != low) {
261             u[i].h = (unsigned char) (low >> 8);
262             u[i].l = (unsigned char) (low & 0x00FF);
263          }
264       }
265       u16_u8(p, MAXWORDUTF8LEN, u, nc);
266       return strlen(p);
267   } else {
268     while (*p != '\0') {
269         *p = csconv[((unsigned char) *p)].clower;
270         p++;
271     }
272   }
273   return nc;
274 }
275 
276 // convert UTF-8 sharp S codes to latin 1
sharps_u8_l1(char * dest,char * source)277 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
278     char * p = dest;
279     *p = *source;
280     for (p++, source++; *(source - 1); p++, source++) {
281         *p = *source;
282         if (*source == '\x9F') *--p = '\xDF';
283     }
284     return dest;
285 }
286 
287 // recursive search for right ss - sharp s permutations
spellsharps(char * base,char * pos,int n,int repnum,char * tmp,int * info,char ** root)288 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
289         int repnum, char * tmp, int * info, char **root) {
290     pos = strstr(pos, "ss");
291     if (pos && (n < MAXSHARPS)) {
292         *pos = '\xC3';
293         *(pos + 1) = '\x9F';
294         hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
295         if (h) return h;
296         *pos = 's';
297         *(pos + 1) = 's';
298         h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
299         if (h) return h;
300     } else if (repnum > 0) {
301         if (utf8) return checkword(base, info, root);
302         return checkword(sharps_u8_l1(tmp, base), info, root);
303     }
304     return NULL;
305 }
306 
is_keepcase(const hentry * rv)307 int Hunspell::is_keepcase(const hentry * rv) {
308     return pAMgr && rv->astr && pAMgr->get_keepcase() &&
309         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
310 }
311 
312 /* insert a word to the beginning of the suggestion array and return ns */
insert_sug(char *** slst,char * word,int ns)313 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
314     char * dup = mystrdup(word);
315     if (!dup) return ns;
316     if (ns == MAXSUGGESTION) {
317         ns--;
318         free((*slst)[ns]);
319     }
320     for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
321     (*slst)[0] = dup;
322     return ns + 1;
323 }
324 
spell(const char * word,int * info,char ** root)325 int Hunspell::spell(const char * word, int * info, char ** root)
326 {
327   struct hentry * rv=NULL;
328   // need larger vector. For example, Turkish capital letter I converted a
329   // 2-byte UTF-8 character (dotless i) by mkallsmall.
330   char cw[MAXWORDUTF8LEN];
331   char wspace[MAXWORDUTF8LEN];
332   w_char unicw[MAXWORDLEN];
333 
334   int info2 = 0;
335   if (!info) info = &info2; else *info = 0;
336 
337   // Hunspell supports XML input of the simplified API (see manual)
338   if (strcmp(word, SPELL_XML) == 0) return 1;
339   int nc = strlen(word);
340   int wl2 = 0;
341   if (utf8) {
342     if (nc >= MAXWORDUTF8LEN) return 0;
343   } else {
344     if (nc >= MAXWORDLEN) return 0;
345   }
346   int captype = 0;
347   int abbv = 0;
348   int wl = 0;
349 
350   // input conversion
351   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
352   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
353   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
354 
355   if (wl == 0 || maxdic == 0) return 1;
356   if (root) *root = NULL;
357 
358   // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
359   enum { NBEGIN, NNUM, NSEP };
360   int nstate = NBEGIN;
361   int i;
362 
363   for (i = 0; (i < wl); i++) {
364     if ((cw[i] <= '9') && (cw[i] >= '0')) {
365         nstate = NNUM;
366     } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
367         if ((nstate == NSEP) || (i == 0)) break;
368         nstate = NSEP;
369     } else break;
370   }
371   if ((i == wl) && (nstate == NNUM)) return 1;
372 
373   switch(captype) {
374      case HUHCAP:
375             /* FALLTHROUGH */
376      case HUHINITCAP:
377             *info += SPELL_ORIGCAP;
378             /* FALLTHROUGH */
379      case NOCAP:
380             rv = checkword(cw, info, root);
381             if ((abbv) && !(rv)) {
382                 memcpy(wspace,cw,wl);
383                 *(wspace+wl) = '.';
384                 *(wspace+wl+1) = '\0';
385                 rv = checkword(wspace, info, root);
386             }
387             break;
388      case ALLCAP: {
389             *info += SPELL_ORIGCAP;
390             rv = checkword(cw, info, root);
391             if (rv) break;
392             if (abbv) {
393                 memcpy(wspace,cw,wl);
394                 *(wspace+wl) = '.';
395                 *(wspace+wl+1) = '\0';
396                 rv = checkword(wspace, info, root);
397                 if (rv) break;
398             }
399             // Spec. prefix handling for Catalan, French, Italian:
400 	    // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
401             if (pAMgr && strchr(cw, '\'')) {
402                 wl = mkallsmall2(cw, unicw, nc);
403         	//There are no really sane circumstances where this could fail,
404         	//but anyway...
405         	if (char * apostrophe = strchr(cw, '\'')) {
406                     if (utf8) {
407             	        w_char tmpword[MAXWORDLEN];
408             	        *apostrophe = '\0';
409             	        wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
410             	        *apostrophe = '\'';
411 		        if (wl2 >= 0 && wl2 < nc) {
412 		            mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
413 			    rv = checkword(cw, info, root);
414 			    if (rv) break;
415 		        }
416                     } else {
417 		        mkinitcap2(apostrophe + 1, unicw, nc);
418 		        rv = checkword(cw, info, root);
419 		        if (rv) break;
420 		    }
421 		}
422 		mkinitcap2(cw, unicw, nc);
423 		rv = checkword(cw, info, root);
424 		if (rv) break;
425             }
426             if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
427                 char tmpword[MAXWORDUTF8LEN];
428                 wl = mkallsmall2(cw, unicw, nc);
429                 memcpy(wspace,cw,(wl+1));
430                 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
431                 if (!rv) {
432                     wl2 = mkinitcap2(cw, unicw, nc);
433                     rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
434                 }
435                 if ((abbv) && !(rv)) {
436                     *(wspace+wl) = '.';
437                     *(wspace+wl+1) = '\0';
438                     rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
439                     if (!rv) {
440                         memcpy(wspace, cw, wl2);
441                         *(wspace+wl2) = '.';
442                         *(wspace+wl2+1) = '\0';
443                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
444                     }
445                 }
446                 if (rv) break;
447             }
448         }
449      case INITCAP: {
450              *info += SPELL_ORIGCAP;
451              wl = mkallsmall2(cw, unicw, nc);
452              memcpy(wspace,cw,(wl+1));
453              wl2 = mkinitcap2(cw, unicw, nc);
454              if (captype == INITCAP) *info += SPELL_INITCAP;
455              rv = checkword(cw, info, root);
456              if (captype == INITCAP) *info -= SPELL_INITCAP;
457              // forbid bad capitalization
458              // (for example, ijs -> Ijs instead of IJs in Dutch)
459              // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
460              if (*info & SPELL_FORBIDDEN) {
461                 rv = NULL;
462                 break;
463              }
464              if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
465              if (rv) break;
466 
467              rv = checkword(wspace, info, root);
468              if (abbv && !rv) {
469 
470                  *(wspace+wl) = '.';
471                  *(wspace+wl+1) = '\0';
472                  rv = checkword(wspace, info, root);
473                  if (!rv) {
474                     memcpy(wspace, cw, wl2);
475                     *(wspace+wl2) = '.';
476                     *(wspace+wl2+1) = '\0';
477     	    	    if (captype == INITCAP) *info += SPELL_INITCAP;
478                     rv = checkword(wspace, info, root);
479     	    	    if (captype == INITCAP) *info -= SPELL_INITCAP;
480                     if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
481                     break;
482                  }
483              }
484              if (rv && is_keepcase(rv) &&
485                 ((captype == ALLCAP) ||
486                    // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
487                    // in INITCAP form, too.
488                    !(pAMgr->get_checksharps() &&
489                       ((utf8 && strstr(wspace, "\xC3\x9F")) ||
490                       (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
491              break;
492            }
493   }
494 
495   if (rv) {
496       if (pAMgr && pAMgr->get_warn() && rv->astr &&
497           TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
498               *info += SPELL_WARN;
499 	      if (pAMgr->get_forbidwarn()) return 0;
500               return HUNSPELL_OK_WARN;
501       }
502       return HUNSPELL_OK;
503   }
504 
505   // recursive breaking at break points
506   if (wordbreak) {
507     char * s;
508     char r;
509     int nbr = 0;
510     wl = strlen(cw);
511     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
512 
513     // calculate break points for recursion limit
514     for (int j = 0; j < numbreak; j++) {
515       s = cw;
516       do {
517       	s = (char *) strstr(s, wordbreak[j]);
518       	if (s) {
519 		nbr++;
520 		s++;
521 	}
522       } while (s);
523     }
524     if (nbr >= 10) return 0;
525 
526     // check boundary patterns (^begin and end$)
527     for (int j = 0; j < numbreak; j++) {
528       int plen = strlen(wordbreak[j]);
529       if (plen == 1 || plen > wl) continue;
530       if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
531         && spell(cw + plen - 1)) return 1;
532       if (wordbreak[j][plen - 1] == '$' &&
533         strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
534 	    r = cw[wl - plen + 1];
535 	    cw[wl - plen + 1] = '\0';
536     	    if (spell(cw)) return 1;
537 	    cw[wl - plen + 1] = r;
538 	}
539     }
540 
541     // other patterns
542     for (int j = 0; j < numbreak; j++) {
543       int plen = strlen(wordbreak[j]);
544       s=(char *) strstr(cw, wordbreak[j]);
545       if (s && (s > cw) && (s < cw + wl - plen)) {
546 	if (!spell(s + plen)) continue;
547         r = *s;
548         *s = '\0';
549         // examine 2 sides of the break point
550         if (spell(cw)) return 1;
551         *s = r;
552 
553         // LANG_hu: spec. dash rule
554 	if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
555 	  r = s[1];
556 	  s[1] = '\0';
557           if (spell(cw)) return 1; // check the first part with dash
558           s[1] = r;
559 	}
560         // end of LANG speficic region
561 
562       }
563     }
564   }
565 
566   return 0;
567 }
568 
checkword(const char * w,int * info,char ** root)569 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
570 {
571   struct hentry * he = NULL;
572   int len, i;
573   char w2[MAXWORDUTF8LEN];
574   const char * word;
575 
576   char * ignoredchars = pAMgr->get_ignore();
577   if (ignoredchars != NULL) {
578      strcpy(w2, w);
579      if (utf8) {
580         int ignoredchars_utf16_len;
581         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
582         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
583      } else {
584         remove_ignored_chars(w2,ignoredchars);
585      }
586      word = w2;
587   } else word = w;
588 
589   len = strlen(word);
590 
591   if (!len)
592       return NULL;
593 
594   // word reversing wrapper for complex prefixes
595   if (complexprefixes) {
596     if (word != w2) {
597       strcpy(w2, word);
598       word = w2;
599     }
600     if (utf8) reverseword_utf(w2); else reverseword(w2);
601   }
602 
603   // look word in hash table
604   for (i = 0; (i < maxdic) && !he; i ++) {
605   he = (pHMgr[i])->lookup(word);
606 
607   // check forbidden and onlyincompound words
608   if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
609     if (info) *info += SPELL_FORBIDDEN;
610     // LANG_hu section: set dash information for suggestions
611     if (langnum == LANG_hu) {
612         if (pAMgr->get_compoundflag() &&
613             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
614                 if (info) *info += SPELL_COMPOUND;
615         }
616     }
617     return NULL;
618   }
619 
620   // he = next not needaffix, onlyincompound homonym or onlyupcase word
621   while (he && (he->astr) &&
622     ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
623        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
624        (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
625     )) he = he->next_homonym;
626   }
627 
628   // check with affixes
629   if (!he && pAMgr) {
630      // try stripping off affixes */
631      he = pAMgr->affix_check(word, len, 0);
632 
633      // check compound restriction and onlyupcase
634      if (he && he->astr && (
635         (pAMgr->get_onlyincompound() &&
636     	    TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
637         (info && (*info & SPELL_INITCAP) &&
638     	    TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
639     	    he = NULL;
640      }
641 
642      if (he) {
643         if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
644             if (info) *info += SPELL_FORBIDDEN;
645             return NULL;
646         }
647         if (root) {
648             *root = mystrdup(he->word);
649             if (*root && complexprefixes) {
650                 if (utf8) reverseword_utf(*root); else reverseword(*root);
651             }
652         }
653      // try check compound word
654      } else if (pAMgr->get_compound()) {
655           he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
656           // LANG_hu section: `moving rule' with last dash
657           if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
658              char * dup = mystrdup(word);
659              if (!dup) return NULL;
660              dup[len-1] = '\0';
661              he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
662              free(dup);
663           }
664           // end of LANG speficic region
665           if (he) {
666                 if (root) {
667                     *root = mystrdup(he->word);
668                     if (*root && complexprefixes) {
669                         if (utf8) reverseword_utf(*root); else reverseword(*root);
670                     }
671                 }
672                 if (info) *info += SPELL_COMPOUND;
673           }
674      }
675 
676   }
677 
678   return he;
679 }
680 
suggest(char *** slst,const char * word)681 int Hunspell::suggest(char*** slst, const char * word)
682 {
683   int onlycmpdsug = 0;
684   char cw[MAXWORDUTF8LEN];
685   char wspace[MAXWORDUTF8LEN];
686   if (!pSMgr || maxdic == 0) return 0;
687   w_char unicw[MAXWORDLEN];
688   *slst = NULL;
689   // process XML input of the simplified API (see manual)
690   if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
691      return spellml(slst, word);
692   }
693   int nc = strlen(word);
694   if (utf8) {
695     if (nc >= MAXWORDUTF8LEN) return 0;
696   } else {
697     if (nc >= MAXWORDLEN) return 0;
698   }
699   int captype = 0;
700   int abbv = 0;
701   int wl = 0;
702 
703   // input conversion
704   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
705   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
706   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
707 
708   if (wl == 0) return 0;
709   int ns = 0;
710   int capwords = 0;
711 
712   // check capitalized form for FORCEUCASE
713   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
714     int info = SPELL_ORIGCAP;
715     char ** wlst;
716     if (checkword(cw, &info, NULL)) {
717         if (*slst) {
718             wlst = *slst;
719         } else {
720             wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
721             if (wlst == NULL) return -1;
722             *slst = wlst;
723             for (int i = 0; i < MAXSUGGESTION; i++) {
724                 wlst[i] = NULL;
725             }
726         }
727         wlst[0] = mystrdup(cw);
728         mkinitcap(wlst[0]);
729         return 1;
730     }
731   }
732 
733   switch(captype) {
734      case NOCAP:   {
735                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
736                      break;
737                    }
738 
739      case INITCAP: {
740                      capwords = 1;
741                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
742                      if (ns == -1) break;
743                      memcpy(wspace,cw,(wl+1));
744                      mkallsmall2(wspace, unicw, nc);
745                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
746                      break;
747                    }
748      case HUHINITCAP:
749                     capwords = 1;
750      case HUHCAP: {
751                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
752                      if (ns != -1) {
753                         int prevns;
754     		        // something.The -> something. The
755                         char * dot = strchr(cw, '.');
756 		        if (dot && (dot > cw)) {
757 		            int captype_;
758 		            if (utf8)
759                             {
760 		               w_char w_[MAXWORDLEN];
761 			       int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
762 		               captype_ = get_captype_utf8(w_, wl_, langnum);
763 		            } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
764 		    	    if (captype_ == INITCAP)
765                             {
766                         	char * st = mystrdup(cw);
767                         	if (st)
768                         	{
769                                     char *newst = (char *) realloc(st, wl + 2);
770                                     if (newst == NULL)
771                                         free(st);
772                                     st = newst;
773                         	}
774 				if (st)
775                                 {
776                         	    st[(dot - cw) + 1] = ' ';
777                         	    strcpy(st + (dot - cw) + 2, dot + 1);
778                     		    ns = insert_sug(slst, st, ns);
779 				    free(st);
780 				}
781 		    	    }
782 		        }
783                         if (captype == HUHINITCAP) {
784                             // TheOpenOffice.org -> The OpenOffice.org
785                             memcpy(wspace,cw,(wl+1));
786                             mkinitsmall2(wspace, unicw, nc);
787                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
788                         }
789                         memcpy(wspace,cw,(wl+1));
790                         mkallsmall2(wspace, unicw, nc);
791                         if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
792                         prevns = ns;
793                         ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
794                         if (captype == HUHINITCAP) {
795                             mkinitcap2(wspace, unicw, nc);
796                             if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
797                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
798                         }
799                         // aNew -> "a New" (instead of "a new")
800                         for (int j = prevns; j < ns; j++) {
801                            char * space = strchr((*slst)[j],' ');
802                            if (space) {
803                                 int slen = strlen(space + 1);
804                                 // different case after space (need capitalisation)
805                                 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
806                                     w_char w[MAXWORDLEN];
807                                     int wc = 0;
808                                     char * r = (*slst)[j];
809                                     if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
810                                     mkinitcap2(space + 1, w, wc);
811                                     // set as first suggestion
812                                     for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
813                                     (*slst)[0] = r;
814                                 }
815                            }
816                         }
817                      }
818                      break;
819                    }
820 
821      case ALLCAP: {
822                      memcpy(wspace, cw, (wl+1));
823                      mkallsmall2(wspace, unicw, nc);
824                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
825                      if (ns == -1) break;
826                      if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
827                         ns = insert_sug(slst, wspace, ns);
828                      mkinitcap2(wspace, unicw, nc);
829                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
830                      for (int j=0; j < ns; j++) {
831                         mkallcap((*slst)[j]);
832                         if (pAMgr && pAMgr->get_checksharps()) {
833                             char * pos;
834                             if (utf8) {
835                                 pos = strstr((*slst)[j], "\xC3\x9F");
836                                 while (pos) {
837                                     *pos = 'S';
838                                     *(pos+1) = 'S';
839                                     pos = strstr(pos+2, "\xC3\x9F");
840                                 }
841                             } else {
842                                 pos = strchr((*slst)[j], '\xDF');
843                                 while (pos) {
844                                     (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
845                                     mystrrep((*slst)[j], "\xDF", "SS");
846                                     pos = strchr((*slst)[j], '\xDF');
847                                 }
848                             }
849                         }
850                      }
851                      break;
852                    }
853   }
854 
855  // LANG_hu section: replace '-' with ' ' in Hungarian
856   if (langnum == LANG_hu) {
857       for (int j=0; j < ns; j++) {
858           char * pos = strchr((*slst)[j],'-');
859           if (pos) {
860               int info;
861               char w[MAXWORDUTF8LEN];
862               *pos = '\0';
863               strcpy(w, (*slst)[j]);
864               strcat(w, pos + 1);
865               (void)spell(w, &info, NULL);
866               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
867                   *pos = ' ';
868               } else *pos = '-';
869           }
870       }
871   }
872   // END OF LANG_hu section
873 
874   // try ngram approach since found nothing or only compound words
875   if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
876       switch(captype) {
877           case NOCAP: {
878               ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
879               break;
880           }
881 	  case HUHINITCAP:
882               capwords = 1;
883           case HUHCAP: {
884               memcpy(wspace,cw,(wl+1));
885               mkallsmall2(wspace, unicw, nc);
886               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
887 	      break;
888           }
889          case INITCAP: {
890               capwords = 1;
891               memcpy(wspace,cw,(wl+1));
892               mkallsmall2(wspace, unicw, nc);
893               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
894               break;
895           }
896           case ALLCAP: {
897               memcpy(wspace,cw,(wl+1));
898               mkallsmall2(wspace, unicw, nc);
899 	      int oldns = ns;
900               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
901               for (int j = oldns; j < ns; j++)
902                   mkallcap((*slst)[j]);
903               break;
904          }
905       }
906   }
907 
908   // try dash suggestion (Afo-American -> Afro-American)
909   if (char * pos = strchr(cw, '-')) {
910      char * ppos = cw;
911      int nodashsug = 1;
912      char ** nlst = NULL;
913      int nn = 0;
914      int last = 0;
915      if (*slst) {
916         for (int j = 0; j < ns && nodashsug == 1; j++) {
917            if (strchr((*slst)[j], '-')) nodashsug = 0;
918         }
919      }
920      while (nodashsug && !last) {
921 	if (*pos == '\0') last = 1; else *pos = '\0';
922         if (!spell(ppos)) {
923           nn = suggest(&nlst, ppos);
924           for (int j = nn - 1; j >= 0; j--) {
925             strncpy(wspace, cw, ppos - cw);
926             strcpy(wspace + (ppos - cw), nlst[j]);
927             if (!last) {
928             	strcat(wspace, "-");
929 		strcat(wspace, pos + 1);
930 	    }
931             ns = insert_sug(slst, wspace, ns);
932             free(nlst[j]);
933           }
934           if (nlst != NULL) free(nlst);
935           nodashsug = 0;
936         }
937 	if (!last) {
938           *pos = '-';
939           ppos = pos + 1;
940           pos = strchr(ppos, '-');
941         }
942 	if (!pos) pos = cw + strlen(cw);
943      }
944   }
945 
946   // word reversing wrapper for complex prefixes
947   if (complexprefixes) {
948     for (int j = 0; j < ns; j++) {
949       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
950     }
951   }
952 
953   // capitalize
954   if (capwords) for (int j=0; j < ns; j++) {
955       mkinitcap((*slst)[j]);
956   }
957 
958   // expand suggestions with dot(s)
959   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
960     for (int j = 0; j < ns; j++) {
961       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
962       strcat((*slst)[j], word + strlen(word) - abbv);
963     }
964   }
965 
966   // remove bad capitalized and forbidden forms
967   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
968   switch (captype) {
969     case INITCAP:
970     case ALLCAP: {
971       int l = 0;
972       for (int j=0; j < ns; j++) {
973         if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
974           char s[MAXSWUTF8L];
975           w_char w[MAXSWL];
976           int len;
977           if (utf8) {
978             len = u8_u16(w, MAXSWL, (*slst)[j]);
979           } else {
980             strcpy(s, (*slst)[j]);
981             len = strlen(s);
982           }
983           mkallsmall2(s, w, len);
984           free((*slst)[j]);
985           if (spell(s)) {
986             (*slst)[l] = mystrdup(s);
987             if ((*slst)[l]) l++;
988           } else {
989             mkinitcap2(s, w, len);
990             if (spell(s)) {
991               (*slst)[l] = mystrdup(s);
992               if ((*slst)[l]) l++;
993             }
994           }
995         } else {
996           (*slst)[l] = (*slst)[j];
997           l++;
998         }
999       }
1000       ns = l;
1001     }
1002   }
1003   }
1004 
1005   // remove duplications
1006   int l = 0;
1007   for (int j = 0; j < ns; j++) {
1008     (*slst)[l] = (*slst)[j];
1009     for (int k = 0; k < l; k++) {
1010       if (strcmp((*slst)[k], (*slst)[j]) == 0) {
1011         free((*slst)[j]);
1012         l--;
1013         break;
1014       }
1015     }
1016     l++;
1017   }
1018   ns = l;
1019 
1020   // output conversion
1021   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1022   for (int j = 0; rl && j < ns; j++) {
1023     if (rl->conv((*slst)[j], wspace)) {
1024       free((*slst)[j]);
1025       (*slst)[j] = mystrdup(wspace);
1026     }
1027   }
1028 
1029   // if suggestions removed by nosuggest, onlyincompound parameters
1030   if (l == 0 && *slst) {
1031     free(*slst);
1032     *slst = NULL;
1033   }
1034   return l;
1035 }
1036 
free_list(char *** slst,int n)1037 void Hunspell::free_list(char *** slst, int n) {
1038         freelist(slst, n);
1039 }
1040 
get_dic_encoding()1041 char * Hunspell::get_dic_encoding()
1042 {
1043   return encoding;
1044 }
1045 
1046 #ifdef HUNSPELL_EXPERIMENTAL
1047 // XXX need UTF-8 support
suggest_auto(char *** slst,const char * word)1048 int Hunspell::suggest_auto(char*** slst, const char * word)
1049 {
1050   char cw[MAXWORDUTF8LEN];
1051   char wspace[MAXWORDUTF8LEN];
1052   if (!pSMgr || maxdic == 0) return 0;
1053   int wl = strlen(word);
1054   if (utf8) {
1055     if (wl >= MAXWORDUTF8LEN) return 0;
1056   } else {
1057     if (wl >= MAXWORDLEN) return 0;
1058   }
1059   int captype = 0;
1060   int abbv = 0;
1061   wl = cleanword(cw, word, &captype, &abbv);
1062   if (wl == 0) return 0;
1063   int ns = 0;
1064   *slst = NULL; // HU, nsug in pSMgr->suggest
1065 
1066   switch(captype) {
1067      case NOCAP:   {
1068                      ns = pSMgr->suggest_auto(slst, cw, ns);
1069                      if (ns>0) break;
1070                      break;
1071                    }
1072 
1073      case INITCAP: {
1074                      memcpy(wspace,cw,(wl+1));
1075                      mkallsmall(wspace);
1076                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1077                      for (int j=0; j < ns; j++)
1078                        mkinitcap((*slst)[j]);
1079                      ns = pSMgr->suggest_auto(slst, cw, ns);
1080                      break;
1081 
1082                    }
1083 
1084      case HUHINITCAP:
1085      case HUHCAP: {
1086                      ns = pSMgr->suggest_auto(slst, cw, ns);
1087                      if (ns == 0) {
1088                         memcpy(wspace,cw,(wl+1));
1089                         mkallsmall(wspace);
1090                         ns = pSMgr->suggest_auto(slst, wspace, ns);
1091                      }
1092                      break;
1093                    }
1094 
1095      case ALLCAP: {
1096                      memcpy(wspace,cw,(wl+1));
1097                      mkallsmall(wspace);
1098                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1099 
1100                      mkinitcap(wspace);
1101                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1102 
1103                      for (int j=0; j < ns; j++)
1104                        mkallcap((*slst)[j]);
1105                      break;
1106                    }
1107   }
1108 
1109   // word reversing wrapper for complex prefixes
1110   if (complexprefixes) {
1111     for (int j = 0; j < ns; j++) {
1112       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
1113     }
1114   }
1115 
1116   // expand suggestions with dot(s)
1117   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1118     for (int j = 0; j < ns; j++) {
1119       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1120       strcat((*slst)[j], word + strlen(word) - abbv);
1121     }
1122   }
1123 
1124   // LANG_hu section: replace '-' with ' ' in Hungarian
1125   if (langnum == LANG_hu) {
1126       for (int j=0; j < ns; j++) {
1127           char * pos = strchr((*slst)[j],'-');
1128           if (pos) {
1129               int info;
1130               char w[MAXWORDUTF8LEN];
1131               *pos = '\0';
1132               strcpy(w, (*slst)[j]);
1133               strcat(w, pos + 1);
1134               spell(w, &info, NULL);
1135               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1136                   *pos = ' ';
1137               } else *pos = '-';
1138           }
1139       }
1140   }
1141   // END OF LANG_hu section
1142   return ns;
1143 }
1144 #endif
1145 
stem(char *** slst,char ** desc,int n)1146 int Hunspell::stem(char*** slst, char ** desc, int n)
1147 {
1148   char result[MAXLNLEN];
1149   char result2[MAXLNLEN];
1150   *slst = NULL;
1151   if (n == 0) return 0;
1152   *result2 = '\0';
1153   for (int i = 0; i < n; i++) {
1154     *result = '\0';
1155     // add compound word parts (except the last one)
1156     char * s = (char *) desc[i];
1157     char * part = strstr(s, MORPH_PART);
1158     if (part) {
1159         char * nextpart = strstr(part + 1, MORPH_PART);
1160         while (nextpart) {
1161             copy_field(result + strlen(result), part, MORPH_PART);
1162             part = nextpart;
1163             nextpart = strstr(part + 1, MORPH_PART);
1164         }
1165         s = part;
1166     }
1167 
1168     char **pl;
1169     char tok[MAXLNLEN];
1170     strcpy(tok, s);
1171     char * alt = strstr(tok, " | ");
1172     while (alt) {
1173         alt[1] = MSEP_ALT;
1174         alt = strstr(alt, " | ");
1175     }
1176     int pln = line_tok(tok, &pl, MSEP_ALT);
1177     for (int k = 0; k < pln; k++) {
1178         // add derivational suffixes
1179         if (strstr(pl[k], MORPH_DERI_SFX)) {
1180             // remove inflectional suffixes
1181             char * is = strstr(pl[k], MORPH_INFL_SFX);
1182             if (is) *is = '\0';
1183             char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
1184             if (sg) {
1185                 char ** gen;
1186                 int genl = line_tok(sg, &gen, MSEP_REC);
1187                 free(sg);
1188                 for (int j = 0; j < genl; j++) {
1189                     sprintf(result2 + strlen(result2), "%c%s%s",
1190                             MSEP_REC, result, gen[j]);
1191                 }
1192                 freelist(&gen, genl);
1193             }
1194         } else {
1195             sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
1196             if (strstr(pl[k], MORPH_SURF_PFX)) {
1197                 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
1198             }
1199             copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
1200         }
1201     }
1202     freelist(&pl, pln);
1203   }
1204   int sln = line_tok(result2, slst, MSEP_REC);
1205   return uniqlist(*slst, sln);
1206 
1207 }
1208 
stem(char *** slst,const char * word)1209 int Hunspell::stem(char*** slst, const char * word)
1210 {
1211   char ** pl;
1212   int pln = analyze(&pl, word);
1213   int pln2 = stem(slst, pl, pln);
1214   freelist(&pl, pln);
1215   return pln2;
1216 }
1217 
1218 #ifdef HUNSPELL_EXPERIMENTAL
suggest_pos_stems(char *** slst,const char * word)1219 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1220 {
1221   char cw[MAXWORDUTF8LEN];
1222   char wspace[MAXWORDUTF8LEN];
1223   if (! pSMgr || maxdic == 0) return 0;
1224   int wl = strlen(word);
1225   if (utf8) {
1226     if (wl >= MAXWORDUTF8LEN) return 0;
1227   } else {
1228     if (wl >= MAXWORDLEN) return 0;
1229   }
1230   int captype = 0;
1231   int abbv = 0;
1232   wl = cleanword(cw, word, &captype, &abbv);
1233   if (wl == 0) return 0;
1234 
1235   int ns = 0; // ns=0 = normalized input
1236 
1237   *slst = NULL; // HU, nsug in pSMgr->suggest
1238 
1239   switch(captype) {
1240      case HUHCAP:
1241      case NOCAP:   {
1242                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1243 
1244                      if ((abbv) && (ns == 0)) {
1245                          memcpy(wspace,cw,wl);
1246                          *(wspace+wl) = '.';
1247                          *(wspace+wl+1) = '\0';
1248                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1249                      }
1250 
1251                      break;
1252                    }
1253 
1254      case INITCAP: {
1255 
1256                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1257 
1258                      if (ns == 0 || ((*slst)[0][0] == '#')) {
1259                         memcpy(wspace,cw,(wl+1));
1260                         mkallsmall(wspace);
1261                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1262                      }
1263 
1264                      break;
1265 
1266                    }
1267 
1268      case ALLCAP: {
1269                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1270                      if (ns != 0) break;
1271 
1272                      memcpy(wspace,cw,(wl+1));
1273                      mkallsmall(wspace);
1274                      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1275 
1276                      if (ns == 0) {
1277                          mkinitcap(wspace);
1278                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1279                      }
1280                      break;
1281                    }
1282   }
1283 
1284   return ns;
1285 }
1286 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1287 
get_wordchars()1288 const char * Hunspell::get_wordchars()
1289 {
1290   return pAMgr->get_wordchars();
1291 }
1292 
get_wordchars_utf16(int * len)1293 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1294 {
1295   return pAMgr->get_wordchars_utf16(len);
1296 }
1297 
mkinitcap(char * p)1298 void Hunspell::mkinitcap(char * p)
1299 {
1300   if (!utf8) {
1301     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1302   } else {
1303       int len;
1304       w_char u[MAXWORDLEN];
1305       len = u8_u16(u, MAXWORDLEN, p);
1306       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1307       u[0].h = (unsigned char) (i >> 8);
1308       u[0].l = (unsigned char) (i & 0x00FF);
1309       u16_u8(p, MAXWORDUTF8LEN, u, len);
1310   }
1311 }
1312 
mkinitcap2(char * p,w_char * u,int nc)1313 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1314 {
1315   if (!utf8) {
1316     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1317   } else if (nc > 0) {
1318       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1319       u[0].h = (unsigned char) (i >> 8);
1320       u[0].l = (unsigned char) (i & 0x00FF);
1321       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1322       return strlen(p);
1323   }
1324   return nc;
1325 }
1326 
mkinitsmall2(char * p,w_char * u,int nc)1327 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1328 {
1329   if (!utf8) {
1330     if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1331   } else if (nc > 0) {
1332       unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1333       u[0].h = (unsigned char) (i >> 8);
1334       u[0].l = (unsigned char) (i & 0x00FF);
1335       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1336       return strlen(p);
1337   }
1338   return nc;
1339 }
1340 
add(const char * word)1341 int Hunspell::add(const char * word)
1342 {
1343     if (pHMgr[0]) return (pHMgr[0])->add(word);
1344     return 0;
1345 }
1346 
add_with_affix(const char * word,const char * example)1347 int Hunspell::add_with_affix(const char * word, const char * example)
1348 {
1349     if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
1350     return 0;
1351 }
1352 
remove(const char * word)1353 int Hunspell::remove(const char * word)
1354 {
1355     if (pHMgr[0]) return (pHMgr[0])->remove(word);
1356     return 0;
1357 }
1358 
get_version()1359 const char * Hunspell::get_version()
1360 {
1361   return pAMgr->get_version();
1362 }
1363 
get_csconv()1364 struct cs_info * Hunspell::get_csconv()
1365 {
1366   return csconv;
1367 }
1368 
cat_result(char * result,char * st)1369 void Hunspell::cat_result(char * result, char * st)
1370 {
1371     if (st) {
1372         if (*result) mystrcat(result, "\n", MAXLNLEN);
1373         mystrcat(result, st, MAXLNLEN);
1374         free(st);
1375     }
1376 }
1377 
analyze(char *** slst,const char * word)1378 int Hunspell::analyze(char*** slst, const char * word)
1379 {
1380   char cw[MAXWORDUTF8LEN];
1381   char wspace[MAXWORDUTF8LEN];
1382   w_char unicw[MAXWORDLEN];
1383   int wl2 = 0;
1384   *slst = NULL;
1385   if (! pSMgr || maxdic == 0) return 0;
1386   int nc = strlen(word);
1387   if (utf8) {
1388     if (nc >= MAXWORDUTF8LEN) return 0;
1389   } else {
1390     if (nc >= MAXWORDLEN) return 0;
1391   }
1392   int captype = 0;
1393   int abbv = 0;
1394   int wl = 0;
1395 
1396   // input conversion
1397   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1398   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
1399   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
1400 
1401   if (wl == 0) {
1402       if (abbv) {
1403           for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1404           cw[wl] = '\0';
1405           abbv = 0;
1406       } else return 0;
1407   }
1408 
1409   char result[MAXLNLEN];
1410   char * st = NULL;
1411 
1412   *result = '\0';
1413 
1414   int n = 0;
1415   int n2 = 0;
1416   int n3 = 0;
1417 
1418   // test numbers
1419   // LANG_hu section: set dash information for suggestions
1420   if (langnum == LANG_hu) {
1421   while ((n < wl) &&
1422         (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1423         n++;
1424         if ((cw[n] == '.') || (cw[n] == ',')) {
1425                 if (((n2 == 0) && (n > 3)) ||
1426                         ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1427                 n2++;
1428                 n3 = n;
1429         }
1430   }
1431 
1432   if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
1433   if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
1434         mystrcat(result, cw, MAXLNLEN);
1435         result[n - 1] = '\0';
1436         if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1437         else {
1438                 char sign = cw[n];
1439                 cw[n] = '\0';
1440                 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1441                 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
1442                 cw[n] = sign;
1443                 cat_result(result, pSMgr->suggest_morph(cw + n));
1444         }
1445         return line_tok(result, slst, MSEP_REC);
1446   }
1447   }
1448   // END OF LANG_hu section
1449 
1450   switch(captype) {
1451      case HUHCAP:
1452      case HUHINITCAP:
1453      case NOCAP:  {
1454                     cat_result(result, pSMgr->suggest_morph(cw));
1455                     if (abbv) {
1456                         memcpy(wspace,cw,wl);
1457                         *(wspace+wl) = '.';
1458                         *(wspace+wl+1) = '\0';
1459                         cat_result(result, pSMgr->suggest_morph(wspace));
1460                     }
1461                     break;
1462                 }
1463      case INITCAP: {
1464                      wl = mkallsmall2(cw, unicw, nc);
1465                      memcpy(wspace,cw,(wl+1));
1466                      wl2 = mkinitcap2(cw, unicw, nc);
1467                      cat_result(result, pSMgr->suggest_morph(wspace));
1468                      cat_result(result, pSMgr->suggest_morph(cw));
1469                      if (abbv) {
1470                          *(wspace+wl) = '.';
1471                          *(wspace+wl+1) = '\0';
1472                          cat_result(result, pSMgr->suggest_morph(wspace));
1473 
1474                          memcpy(wspace, cw, wl2);
1475                          *(wspace+wl2) = '.';
1476                          *(wspace+wl2+1) = '\0';
1477 
1478                          cat_result(result, pSMgr->suggest_morph(wspace));
1479                      }
1480                      break;
1481                    }
1482      case ALLCAP: {
1483                      cat_result(result, pSMgr->suggest_morph(cw));
1484                      if (abbv) {
1485                          memcpy(wspace,cw,wl);
1486                          *(wspace+wl) = '.';
1487                          *(wspace+wl+1) = '\0';
1488                          cat_result(result, pSMgr->suggest_morph(cw));
1489                      }
1490                      wl = mkallsmall2(cw, unicw, nc);
1491                      memcpy(wspace,cw,(wl+1));
1492                      wl2 = mkinitcap2(cw, unicw, nc);
1493 
1494                      cat_result(result, pSMgr->suggest_morph(wspace));
1495                      cat_result(result, pSMgr->suggest_morph(cw));
1496                      if (abbv) {
1497                          *(wspace+wl) = '.';
1498                          *(wspace+wl+1) = '\0';
1499                          cat_result(result, pSMgr->suggest_morph(wspace));
1500 
1501                          memcpy(wspace, cw, wl2);
1502                          *(wspace+wl2) = '.';
1503                          *(wspace+wl2+1) = '\0';
1504 
1505                          cat_result(result, pSMgr->suggest_morph(wspace));
1506                      }
1507                      break;
1508                    }
1509   }
1510 
1511   if (*result) {
1512     // word reversing wrapper for complex prefixes
1513     if (complexprefixes) {
1514       if (utf8) reverseword_utf(result); else reverseword(result);
1515     }
1516     return line_tok(result, slst, MSEP_REC);
1517   }
1518 
1519   // compound word with dash (HU) I18n
1520   char * dash = NULL;
1521   int nresult = 0;
1522   // LANG_hu section: set dash information for suggestions
1523   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1524   if ((langnum == LANG_hu) && dash) {
1525       *dash='\0';
1526       // examine 2 sides of the dash
1527       if (dash[1] == '\0') { // base word ending with dash
1528         if (spell(cw)) {
1529 		char * p = pSMgr->suggest_morph(cw);
1530 		if (p) {
1531 		    int ret = line_tok(p, slst, MSEP_REC);
1532 		    free(p);
1533 		    return ret;
1534 		}
1535 
1536 	}
1537       } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1538         if (spell(cw) && (spell("-e"))) {
1539                         st = pSMgr->suggest_morph(cw);
1540                         if (st) {
1541                                 mystrcat(result, st, MAXLNLEN);
1542                                 free(st);
1543                         }
1544                         mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1545                         st = pSMgr->suggest_morph("-e");
1546                         if (st) {
1547                                 mystrcat(result, st, MAXLNLEN);
1548                                 free(st);
1549                         }
1550                         return line_tok(result, slst, MSEP_REC);
1551                 }
1552       } else {
1553       // first word ending with dash: word- XXX ???
1554         char r2 = *(dash + 1);
1555         dash[0]='-';
1556         dash[1]='\0';
1557         nresult = spell(cw);
1558         dash[1] = r2;
1559         dash[0]='\0';
1560         if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1561                 ((dash[1] > '0') && (dash[1] < '9')))) {
1562                             st = pSMgr->suggest_morph(cw);
1563                             if (st) {
1564                                 mystrcat(result, st, MAXLNLEN);
1565                                     free(st);
1566                                 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1567                             }
1568                             st = pSMgr->suggest_morph(dash+1);
1569                             if (st) {
1570                                     mystrcat(result, st, MAXLNLEN);
1571                                     free(st);
1572                             }
1573                             return line_tok(result, slst, MSEP_REC);
1574                         }
1575       }
1576       // affixed number in correct word
1577      if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1578                         (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1579          *dash='-';
1580          n = 1;
1581          if (*(dash - n) == '.') n++;
1582          // search first not a number character to left from dash
1583          while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1584             n++;
1585          }
1586          if ((dash - n) < cw) n--;
1587          // numbers: valami1000000-hoz
1588          // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1589          // 56-hoz, 6-hoz
1590          for(; n >= 1; n--) {
1591             if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1592                     mystrcat(result, cw, MAXLNLEN);
1593                     result[dash - cw - n] = '\0';
1594                         st = pSMgr->suggest_morph(dash - n);
1595                         if (st) {
1596                         mystrcat(result, st, MAXLNLEN);
1597                                 free(st);
1598                         }
1599                         return line_tok(result, slst, MSEP_REC);
1600             }
1601          }
1602      }
1603   }
1604   return 0;
1605 }
1606 
generate(char *** slst,const char * word,char ** pl,int pln)1607 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
1608 {
1609   *slst = NULL;
1610   if (!pSMgr || !pln) return 0;
1611   char **pl2;
1612   int pl2n = analyze(&pl2, word);
1613   int captype = 0;
1614   int abbv = 0;
1615   char cw[MAXWORDUTF8LEN];
1616   cleanword(cw, word, &captype, &abbv);
1617   char result[MAXLNLEN];
1618   *result = '\0';
1619 
1620   for (int i = 0; i < pln; i++) {
1621     cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
1622   }
1623   freelist(&pl2, pl2n);
1624 
1625   if (*result) {
1626     // allcap
1627     if (captype == ALLCAP) mkallcap(result);
1628 
1629     // line split
1630     int linenum = line_tok(result, slst, MSEP_REC);
1631 
1632     // capitalize
1633     if (captype == INITCAP || captype == HUHINITCAP) {
1634         for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
1635     }
1636 
1637     // temporary filtering of prefix related errors (eg.
1638     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1639 
1640     int r = 0;
1641     for (int j=0; j < linenum; j++) {
1642         if (!spell((*slst)[j])) {
1643             free((*slst)[j]);
1644             (*slst)[j] = NULL;
1645         } else {
1646             if (r < j) (*slst)[r] = (*slst)[j];
1647             r++;
1648         }
1649     }
1650     if (r > 0) return r;
1651     free(*slst);
1652     *slst = NULL;
1653   }
1654   return 0;
1655 }
1656 
generate(char *** slst,const char * word,const char * pattern)1657 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
1658 {
1659   char **pl;
1660   int pln = analyze(&pl, pattern);
1661   int n = generate(slst, word, pl, pln);
1662   freelist(&pl, pln);
1663   return uniqlist(*slst, n);
1664 }
1665 
1666 // minimal XML parser functions
get_xml_par(char * dest,const char * par,int max)1667 int Hunspell::get_xml_par(char * dest, const char * par, int max)
1668 {
1669    char * d = dest;
1670    if (!par) return 0;
1671    char end = *par;
1672    char * dmax = dest + max;
1673    if (end == '>') end = '<';
1674    else if (end != '\'' && end != '"') return 0; // bad XML
1675    for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
1676    *d = '\0';
1677    mystrrep(dest, "&lt;", "<");
1678    mystrrep(dest, "&amp;", "&");
1679    return (int)(d - dest);
1680 }
1681 
get_langnum() const1682 int Hunspell::get_langnum() const
1683 {
1684    return langnum;
1685 }
1686 
input_conv(const char * word,char * dest)1687 int Hunspell::input_conv(const char * word, char * dest)
1688 {
1689   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1690   return (rl && rl->conv(word, dest));
1691 }
1692 
1693 
1694 // return the beginning of the element (attr == NULL) or the attribute
get_xml_pos(const char * s,const char * attr)1695 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
1696 {
1697   const char * end = strchr(s, '>');
1698   const char * p = s;
1699   if (attr == NULL) return end;
1700   do {
1701     p = strstr(p, attr);
1702     if (!p || p >= end) return 0;
1703   } while (*(p-1) != ' ' &&  *(p-1) != '\n');
1704   return p + strlen(attr);
1705 }
1706 
check_xml_par(const char * q,const char * attr,const char * value)1707 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
1708   char cw[MAXWORDUTF8LEN];
1709   if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
1710     strcmp(cw, value) == 0) return 1;
1711   return 0;
1712 }
1713 
get_xml_list(char *** slst,char * list,const char * tag)1714 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
1715     int n = 0;
1716     char * p;
1717     if (!list) return 0;
1718     for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;
1719     if (n == 0) return 0;
1720     *slst = (char **) malloc(sizeof(char *) * n);
1721     if (!*slst) return 0;
1722     for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
1723         int l = strlen(p);
1724         (*slst)[n] = (char *) malloc(l + 1);
1725         if (!(*slst)[n]) return n;
1726         if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
1727             free((*slst)[n]);
1728             break;
1729         }
1730     }
1731     return n;
1732 }
1733 
1734 namespace
1735 {
myrep(std::string & str,const std::string & search,const std::string & replace)1736     void myrep(std::string& str, const std::string& search, const std::string& replace)
1737     {
1738         size_t pos = 0;
1739         while ((pos = str.find(search, pos)) != std::string::npos)
1740         {
1741            str.replace(pos, search.length(), replace);
1742            pos += replace.length();
1743         }
1744     }
1745 }
1746 
spellml(char *** slst,const char * word)1747 int Hunspell::spellml(char*** slst, const char * word)
1748 {
1749   char *q, *q2;
1750   char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
1751   q = (char *) strstr(word, "<query");
1752   if (!q) return 0; // bad XML input
1753   q2 = strchr(q, '>');
1754   if (!q2) return 0; // bad XML input
1755   q2 = strstr(q2, "<word");
1756   if (!q2) return 0; // bad XML input
1757   if (check_xml_par(q, "type=", "analyze")) {
1758       int n = 0;
1759       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
1760       if (n == 0) return 0;
1761       // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1762       std::string r;
1763       r.append("<code>");
1764       for (int i = 0; i < n; i++) {
1765         r.append("<a>");
1766 
1767         std::string entry((*slst)[i]);
1768         free((*slst)[i]);
1769         myrep(entry, "\t", " ");
1770         myrep(entry, "&", "&amp;");
1771         myrep(entry, "<", "&lt;");
1772         r.append(entry);
1773 
1774         r.append("</a>");
1775       }
1776       r.append("</code>");
1777       (*slst)[0] = mystrdup(r.c_str());
1778       return 1;
1779   } else if (check_xml_par(q, "type=", "stem")) {
1780       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
1781   } else if (check_xml_par(q, "type=", "generate")) {
1782       int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
1783       if (n == 0) return 0;
1784       char * q3 = strstr(q2 + 1, "<word");
1785       if (q3) {
1786         if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
1787             return generate(slst, cw, cw2);
1788         }
1789       } else {
1790         if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1791           char ** slst2;
1792           if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
1793             int n2 = generate(slst, cw, slst2, n);
1794             freelist(&slst2, n);
1795             return uniqlist(*slst, n2);
1796           }
1797           freelist(&slst2, n);
1798         }
1799       }
1800   }
1801   return 0;
1802 }
1803 
1804 
1805 #ifdef HUNSPELL_EXPERIMENTAL
1806 // XXX need UTF-8 support
morph_with_correction(const char * word)1807 char * Hunspell::morph_with_correction(const char * word)
1808 {
1809   char cw[MAXWORDUTF8LEN];
1810   char wspace[MAXWORDUTF8LEN];
1811   if (! pSMgr || maxdic == 0) return NULL;
1812   int wl = strlen(word);
1813   if (utf8) {
1814     if (wl >= MAXWORDUTF8LEN) return NULL;
1815   } else {
1816     if (wl >= MAXWORDLEN) return NULL;
1817   }
1818   int captype = 0;
1819   int abbv = 0;
1820   wl = cleanword(cw, word, &captype, &abbv);
1821   if (wl == 0) return NULL;
1822 
1823   char result[MAXLNLEN];
1824   char * st = NULL;
1825 
1826   *result = '\0';
1827 
1828 
1829   switch(captype) {
1830      case NOCAP:   {
1831                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1832                      if (st) {
1833                         mystrcat(result, st, MAXLNLEN);
1834                         free(st);
1835                      }
1836                      if (abbv) {
1837                          memcpy(wspace,cw,wl);
1838                          *(wspace+wl) = '.';
1839                          *(wspace+wl+1) = '\0';
1840                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1841                          if (st) {
1842                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1843                             mystrcat(result, st, MAXLNLEN);
1844                             free(st);
1845                                                  }
1846                      }
1847                                          break;
1848                    }
1849      case INITCAP: {
1850                      memcpy(wspace,cw,(wl+1));
1851                      mkallsmall(wspace);
1852                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1853                      if (st) {
1854                         mystrcat(result, st, MAXLNLEN);
1855                         free(st);
1856                      }
1857                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1858                      if (st) {
1859                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1860                         mystrcat(result, st, MAXLNLEN);
1861                         free(st);
1862                      }
1863                      if (abbv) {
1864                          memcpy(wspace,cw,wl);
1865                          *(wspace+wl) = '.';
1866                          *(wspace+wl+1) = '\0';
1867                          mkallsmall(wspace);
1868                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1869                          if (st) {
1870                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1871                             mystrcat(result, st, MAXLNLEN);
1872                             free(st);
1873                          }
1874                          mkinitcap(wspace);
1875                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1876                          if (st) {
1877                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1878                             mystrcat(result, st, MAXLNLEN);
1879                             free(st);
1880                          }
1881                      }
1882                      break;
1883                    }
1884      case HUHCAP: {
1885                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1886                      if (st) {
1887                         mystrcat(result, st, MAXLNLEN);
1888                         free(st);
1889                      }
1890                      memcpy(wspace,cw,(wl+1));
1891                      mkallsmall(wspace);
1892                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1893                      if (st) {
1894                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1895                         mystrcat(result, st, MAXLNLEN);
1896                         free(st);
1897                      }
1898                      break;
1899                  }
1900      case ALLCAP: {
1901                      memcpy(wspace,cw,(wl+1));
1902                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1903                      if (st) {
1904                         mystrcat(result, st, MAXLNLEN);
1905                         free(st);
1906                      }
1907                      mkallsmall(wspace);
1908                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1909                      if (st) {
1910                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1911                         mystrcat(result, st, MAXLNLEN);
1912                         free(st);
1913                      }
1914                      mkinitcap(wspace);
1915                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1916                      if (st) {
1917                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1918                         mystrcat(result, st, MAXLNLEN);
1919                         free(st);
1920                      }
1921                      if (abbv) {
1922                         memcpy(wspace,cw,(wl+1));
1923                         *(wspace+wl) = '.';
1924                         *(wspace+wl+1) = '\0';
1925                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1926                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1927                         if (st) {
1928                             mystrcat(result, st, MAXLNLEN);
1929                             free(st);
1930                         }
1931                         mkallsmall(wspace);
1932                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1933                         if (st) {
1934                           if (*result) mystrcat(result, "\n", MAXLNLEN);
1935                           mystrcat(result, st, MAXLNLEN);
1936                           free(st);
1937                         }
1938                         mkinitcap(wspace);
1939                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1940                         if (st) {
1941                           if (*result) mystrcat(result, "\n", MAXLNLEN);
1942                           mystrcat(result, st, MAXLNLEN);
1943                           free(st);
1944                         }
1945                      }
1946                      break;
1947                    }
1948   }
1949 
1950   if (*result) return mystrdup(result);
1951   return NULL;
1952 }
1953 
1954 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1955 
Hunspell_create(const char * affpath,const char * dpath)1956 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1957 {
1958         return (Hunhandle*)(new Hunspell(affpath, dpath));
1959 }
1960 
Hunspell_create_key(const char * affpath,const char * dpath,const char * key)1961 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1962     const char * key)
1963 {
1964         return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1965 }
1966 
Hunspell_destroy(Hunhandle * pHunspell)1967 void Hunspell_destroy(Hunhandle *pHunspell)
1968 {
1969         delete (Hunspell*)(pHunspell);
1970 }
1971 
Hunspell_spell(Hunhandle * pHunspell,const char * word)1972 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1973 {
1974         return ((Hunspell*)pHunspell)->spell(word);
1975 }
1976 
Hunspell_get_dic_encoding(Hunhandle * pHunspell)1977 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1978 {
1979         return ((Hunspell*)pHunspell)->get_dic_encoding();
1980 }
1981 
Hunspell_suggest(Hunhandle * pHunspell,char *** slst,const char * word)1982 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1983 {
1984         return ((Hunspell*)pHunspell)->suggest(slst, word);
1985 }
1986 
Hunspell_analyze(Hunhandle * pHunspell,char *** slst,const char * word)1987 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
1988 {
1989         return ((Hunspell*)pHunspell)->analyze(slst, word);
1990 }
1991 
Hunspell_stem(Hunhandle * pHunspell,char *** slst,const char * word)1992 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
1993 {
1994         return ((Hunspell*)pHunspell)->stem(slst, word);
1995 }
1996 
Hunspell_stem2(Hunhandle * pHunspell,char *** slst,char ** desc,int n)1997 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
1998 {
1999         return ((Hunspell*)pHunspell)->stem(slst, desc, n);
2000 }
2001 
Hunspell_generate(Hunhandle * pHunspell,char *** slst,const char * word,const char * word2)2002 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
2003     const char * word2)
2004 {
2005         return ((Hunspell*)pHunspell)->generate(slst, word, word2);
2006 }
2007 
Hunspell_generate2(Hunhandle * pHunspell,char *** slst,const char * word,char ** desc,int n)2008 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
2009     char** desc, int n)
2010 {
2011         return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
2012 }
2013 
2014   /* functions for run-time modification of the dictionary */
2015 
2016   /* add word to the run-time dictionary */
2017 
Hunspell_add(Hunhandle * pHunspell,const char * word)2018 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
2019         return ((Hunspell*)pHunspell)->add(word);
2020 }
2021 
2022   /* add word to the run-time dictionary with affix flags of
2023    * the example (a dictionary word): Hunspell will recognize
2024    * affixed forms of the new word, too.
2025    */
2026 
Hunspell_add_with_affix(Hunhandle * pHunspell,const char * word,const char * example)2027 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
2028         const char * example) {
2029         return ((Hunspell*)pHunspell)->add_with_affix(word, example);
2030 }
2031 
2032   /* remove word from the run-time dictionary */
2033 
Hunspell_remove(Hunhandle * pHunspell,const char * word)2034 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
2035         return ((Hunspell*)pHunspell)->remove(word);
2036 }
2037 
Hunspell_free_list(Hunhandle *,char *** slst,int n)2038 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
2039         freelist(slst, n);
2040 }
2041